]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
VFS: use wait_var_event for waiting in d_alloc_parallel()
authorNeilBrown <neil@brown.name>
Thu, 30 Apr 2026 19:42:43 +0000 (15:42 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 5 Jun 2026 04:34:54 +0000 (00:34 -0400)
Parallel lookup starts with a call of d_alloc_parallel().  That primitive
either returns a matching hashed dentry or allocates a new one in the
in-lookup state and returns it to the caller.  Once the caller is done
with lookup, it indicates so either by call of d_{splice_alias,add}()
or by call of d_done_lookup(); at that point dentry leaves the in-lookup
state.

If d_alloc_parallel() finds a matching in-lookup dentry, it must wait for
that dentry to leave the in-lookup state, one way or another.  Currently
by supplying wait_queue_head to d_alloc_parallel().  If d_alloc_parallel()
creates a new in-lookup dentry, the address of that wait_queue_head is stored
in ->d_wait of new dentry and stays there while it's in the in-lookup;
subsequent d_alloc_parallel() will wait on the queue found in the matching
in-lookup dentry.  Transition out of in-lookup state wakes waiters on that
queue (if any).

That works, but the calling conventions are inconvenient - the caller must
supply wait_queue_head and make sure that it survives at least until the new
in-lookup dentry leaves the in-lookup state.  That amounts to boilerplate
in the d_alloc_parallel() callers that are followed by a call of d_lookup_done()
in the same function; in cases like nfs asynchronous unlink it gets worse than
that.

This patch changes d_alloc_parallel() to use wake_up_var_locked() to
wake up waiters, and wait_var_event_spinlock() to wait.  dentry->d_lock
is used for synchronisation as it is already held and the relevant
times.

That eliminates the need of caller-supplied wait_queue_head, simplifying
the calling conventions.  Better yet, we only need one bit of information
stored in dentry itself: whether there are any waiters to be woken up,
and that can be easily stored in ->d_flags; ->d_wait goes away.

The reason we need that bit (DCACHE_LOOKUP_WAITERS) is that with wait_var
machinery the queues are shared with all kinds of stuff and there's
no way tell if any of the waiters have anything to do with our dentry;
most of the time none of them will be relevant, so we need to avoid the
pointless wakeups.

Another benefit of the new scheme comes from the fact that wakeups
have to be done outside of write-side critical areas of ->i_dir_seq;
with the old scheme we need to carry the value picked from ->d_wait from
__d_lookup_unhash() to the place where we actually wake the waiters up.
Now we can just leave DCACHE_LOOKUP_WAITERS in ->d_flags until we get
to doing wakeups - that's done within the same ->d_lock scope, so we
are fine; new bit is accessed only under ->d_lock and it's seen only
on dentries with DCACHE_PAR_LOOKUP in ->d_flags.

__d_lookup_unhash() no longer needs to re-init ->d_lru.  That was
previously shared (in a union) with ->d_wait but ->d_wait is now gone
so it no longer corrupts ->d_lru.

Co-developed-by: Al Viro <viro@zeniv.linux.org.uk> # saner handling of flags
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
12 files changed:
Documentation/filesystems/porting.rst
fs/afs/dir_silly.c
fs/dcache.c
fs/fuse/readdir.c
fs/namei.c
fs/nfs/dir.c
fs/nfs/unlink.c
fs/proc/base.c
fs/proc/proc_sysctl.c
fs/smb/client/readdir.c
include/linux/dcache.h
include/linux/nfs_xdr.h

index fdf074429cd3ab7a4595182926edaba92d331a65..36fecc7a3d972c7e5659fd4c3c9a2cd4117372d3 100644 (file)
@@ -1385,3 +1385,9 @@ for_each_alias(dentry, inode) instead of hlist_for_each_entry; better
 yet, see if any of the exported primitives could be used instead of
 the entire loop.  You still need to hold ->i_lock of the inode over
 either form of manual loop.
+
+---
+
+**mandatory**
+
+d_alloc_parallel() no longer requires a waitqueue_head.
index a748fd133fafa613fee5bb1bf37ad83798456e14..982bb6ec15f03b6c036a418aabd469745070c849 100644 (file)
@@ -248,13 +248,11 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
        struct dentry *alias;
        int ret;
 
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-
        _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
 
        down_read(&dvnode->rmdir_lock);
 
-       alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name, &wq);
+       alias = d_alloc_parallel(dentry->d_parent, &dentry->d_name);
        if (IS_ERR(alias)) {
                up_read(&dvnode->rmdir_lock);
                return 0;
index 2c61aeea41f45df05bf335a28e4e54a16583f056..0aff2c510beb734b517a6354d8096fda355d97ad 100644 (file)
@@ -2250,8 +2250,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
                return found;
        }
        if (d_in_lookup(dentry)) {
-               found = d_alloc_parallel(dentry->d_parent, name,
-                                       dentry->d_wait);
+               found = d_alloc_parallel(dentry->d_parent, name);
                if (IS_ERR(found) || !d_in_lookup(found)) {
                        iput(inode);
                        return found;
@@ -2638,32 +2637,24 @@ static inline unsigned start_dir_add(struct inode *dir)
        }
 }
 
-static inline void end_dir_add(struct inode *dir, unsigned int n,
-                              wait_queue_head_t *d_wait)
+static inline void end_dir_add(struct inode *dir, unsigned int n)
 {
        smp_store_release(&dir->i_dir_seq, n + 2);
        preempt_enable_nested();
-       if (wq_has_sleeper(d_wait))
-               wake_up_all(d_wait);
 }
 
 static void d_wait_lookup(struct dentry *dentry)
 {
-       if (d_in_lookup(dentry)) {
-               DECLARE_WAITQUEUE(wait, current);
-               add_wait_queue(dentry->d_wait, &wait);
-               do {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       spin_unlock(&dentry->d_lock);
-                       schedule();
-                       spin_lock(&dentry->d_lock);
-               } while (d_in_lookup(dentry));
+       if (likely(d_in_lookup(dentry))) {
+               dentry->d_flags |= DCACHE_LOOKUP_WAITERS;
+               wait_var_event_spinlock(&dentry->d_flags,
+                                       !d_in_lookup(dentry),
+                                       &dentry->d_lock);
        }
 }
 
 struct dentry *d_alloc_parallel(struct dentry *parent,
-                               const struct qstr *name,
-                               wait_queue_head_t *wq)
+                               const struct qstr *name)
 {
        unsigned int hash = name->hash;
        struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2766,7 +2757,6 @@ retry:
                return dentry;
        }
        rcu_read_unlock();
-       new->d_wait = wq;
        hlist_bl_add_head(&new->d_in_lookup_hash, b);
        hlist_bl_unlock(b);
        return new;
@@ -2778,13 +2768,26 @@ mismatch:
 EXPORT_SYMBOL(d_alloc_parallel);
 
 /*
- * - Unhash the dentry
- * - Retrieve and clear the waitqueue head in dentry
- * - Return the waitqueue head
+ * Move dentry from in-lookup state to busy-negative one.
+ *
+ * From now on d_in_lookup(dentry) will return false and dentry is gone from
+ * in-lookup hash.
+ *
+ * Anyone who had been waiting on it in d_alloc_parallel() is free to
+ * proceed after that.  Note that waking such waiters up is left to
+ * the callers; PREEMPT_RT kernels can't have that wakeup done while
+ * in write-side critical area for ->i_dir_seq, so it's done by calling
+ * __d_wake_in_lookup_waiters() once it's safe to do so.
+ *
+ * Both __d_lookup_unhash() and __d_wake_in_lookup_waiters() should
+ * be called within the same ->d_lock scope.  PAR_LOOKUP is cleared
+ * here, while LOOKUP_WAITERS (set by somebody finding dentry in
+ * the in-lookup hash and setting down to wait) is checked and cleared
+ * in __d_wake_in_lookup_waiters().  Both are gone by the end of
+ * ->d_lock scope.
  */
-static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
+static void __d_lookup_unhash(struct dentry *dentry)
 {
-       wait_queue_head_t *d_wait;
        struct hlist_bl_head *b;
 
        lockdep_assert_held(&dentry->d_lock);
@@ -2793,18 +2796,23 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
        hlist_bl_lock(b);
        dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
        __hlist_bl_del(&dentry->d_in_lookup_hash);
-       d_wait = dentry->d_wait;
-       dentry->d_wait = NULL;
        hlist_bl_unlock(b);
        dentry->waiters = NULL;
-       INIT_LIST_HEAD(&dentry->d_lru);
-       return d_wait;
+}
+
+static inline void __d_wake_in_lookup_waiters(struct dentry *dentry)
+{
+       if (dentry->d_flags & DCACHE_LOOKUP_WAITERS) {
+               wake_up_var_locked(&dentry->d_flags, &dentry->d_lock);
+               dentry->d_flags &= ~DCACHE_LOOKUP_WAITERS;
+       }
 }
 
 void __d_lookup_unhash_wake(struct dentry *dentry)
 {
        spin_lock(&dentry->d_lock);
-       wake_up_all(__d_lookup_unhash(dentry));
+       __d_lookup_unhash(dentry);
+       __d_wake_in_lookup_waiters(dentry);
        spin_unlock(&dentry->d_lock);
 }
 EXPORT_SYMBOL(__d_lookup_unhash_wake);
@@ -2814,14 +2822,13 @@ EXPORT_SYMBOL(__d_lookup_unhash_wake);
 static inline void __d_add(struct dentry *dentry, struct inode *inode,
                           const struct dentry_operations *ops)
 {
-       wait_queue_head_t *d_wait;
        struct inode *dir = NULL;
        unsigned n;
        spin_lock(&dentry->d_lock);
        if (unlikely(d_in_lookup(dentry))) {
                dir = dentry->d_parent->d_inode;
                n = start_dir_add(dir);
-               d_wait = __d_lookup_unhash(dentry);
+               __d_lookup_unhash(dentry);
        }
        if (unlikely(ops))
                d_set_d_op(dentry, ops);
@@ -2834,8 +2841,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode,
                fsnotify_update_flags(dentry);
        }
        __d_rehash(dentry);
-       if (dir)
-               end_dir_add(dir, n, d_wait);
+       if (dir) {
+               end_dir_add(dir, n);
+               __d_wake_in_lookup_waiters(dentry);
+       }
        spin_unlock(&dentry->d_lock);
        if (inode)
                spin_unlock(&inode->i_lock);
@@ -2948,7 +2957,6 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
                     bool exchange)
 {
        struct dentry *old_parent, *p;
-       wait_queue_head_t *d_wait;
        struct inode *dir = NULL;
        unsigned n;
 
@@ -2979,7 +2987,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
        if (unlikely(d_in_lookup(target))) {
                dir = target->d_parent->d_inode;
                n = start_dir_add(dir);
-               d_wait = __d_lookup_unhash(target);
+               __d_lookup_unhash(target);
        }
 
        write_seqcount_begin(&dentry->d_seq);
@@ -3018,9 +3026,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
        write_seqcount_end(&target->d_seq);
        write_seqcount_end(&dentry->d_seq);
 
-       if (dir)
-               end_dir_add(dir, n, d_wait);
-
+       if (dir) {
+               end_dir_add(dir, n);
+               __d_wake_in_lookup_waiters(target);
+       }
        if (dentry->d_parent != old_parent)
                spin_unlock(&dentry->d_parent->d_lock);
        if (dentry != old_parent)
index db5ae8ec10305aa86bba536b80e3b1ba6d81c778..a2361f1d99059297eeeaa3ce4d93cb938b2879e8 100644 (file)
@@ -164,7 +164,6 @@ static int fuse_direntplus_link(struct file *file,
        struct inode *dir = d_inode(parent);
        struct fuse_conn *fc;
        struct inode *inode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        int epoch;
 
        if (!o->nodeid) {
@@ -201,7 +200,7 @@ static int fuse_direntplus_link(struct file *file,
        dentry = d_lookup(parent, &name);
        if (!dentry) {
 retry:
-               dentry = d_alloc_parallel(parent, &name, &wq);
+               dentry = d_alloc_parallel(parent, &name);
                if (IS_ERR(dentry))
                        return PTR_ERR(dentry);
        }
index c7fac83c9a85ef250bb424af0a91d581ee9f4919..ebde3a35746c1dda7bae2ca2b35593660c52527f 100644 (file)
@@ -1891,13 +1891,12 @@ static struct dentry *__lookup_slow(const struct qstr *name,
 {
        struct dentry *dentry, *old;
        struct inode *inode = dir->d_inode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 
        /* Don't go there if it's already dead */
        if (unlikely(IS_DEADDIR(inode)))
                return ERR_PTR(-ENOENT);
 again:
-       dentry = d_alloc_parallel(dir, name, &wq);
+       dentry = d_alloc_parallel(dir, name);
        if (IS_ERR(dentry))
                return dentry;
        if (unlikely(!d_in_lookup(dentry))) {
@@ -4414,7 +4413,6 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
        struct dentry *dentry;
        int error, create_error = 0;
        umode_t mode = op->mode;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 
        if (unlikely(IS_DEADDIR(dir_inode)))
                return ERR_PTR(-ENOENT);
@@ -4423,7 +4421,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
        dentry = d_lookup(dir, &nd->last);
        for (;;) {
                if (!dentry) {
-                       dentry = d_alloc_parallel(dir, &nd->last, &wq);
+                       dentry = d_alloc_parallel(dir, &nd->last);
                        if (IS_ERR(dentry))
                                return dentry;
                }
index e9ce1883288c55bbf24671c1e4bd1a61a694ae3f..9580af999d703971dae66bd6c7d3bedff8e027cf 100644 (file)
@@ -726,7 +726,6 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
                unsigned long dir_verifier)
 {
        struct qstr filename = QSTR_INIT(entry->name, entry->len);
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        struct dentry *dentry;
        struct dentry *alias;
        struct inode *inode;
@@ -755,7 +754,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
        dentry = d_lookup(parent, &filename);
 again:
        if (!dentry) {
-               dentry = d_alloc_parallel(parent, &filename, &wq);
+               dentry = d_alloc_parallel(parent, &filename);
                if (IS_ERR(dentry))
                        return;
        }
@@ -2106,7 +2105,6 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
                    struct file *file, unsigned open_flags,
                    umode_t mode)
 {
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        struct nfs_open_context *ctx;
        struct dentry *res;
        struct iattr attr = { .ia_valid = ATTR_OPEN };
@@ -2162,7 +2160,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
                d_drop(dentry);
                switched = true;
                dentry = d_alloc_parallel(dentry->d_parent,
-                                         &dentry->d_name, &wq);
+                                         &dentry->d_name);
                if (IS_ERR(dentry))
                        return PTR_ERR(dentry);
                if (unlikely(!d_in_lookup(dentry)))
index df3ca4669df696a0e6d515cab1f36910a624fc91..43ea897943c025e03ce477d4f70e16bb31f8863a 100644 (file)
@@ -124,7 +124,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf
        struct dentry *alias;
 
        down_read_non_owner(&NFS_I(dir)->rmdir_sem);
-       alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
+       alias = d_alloc_parallel(dentry->d_parent, &data->args.name);
        if (IS_ERR(alias)) {
                up_read_non_owner(&NFS_I(dir)->rmdir_sem);
                return 0;
@@ -185,7 +185,6 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
 
        data->cred = get_current_cred();
        data->res.dir_attr = &data->dir_attr;
-       init_waitqueue_head(&data->wq);
 
        status = -EBUSY;
        spin_lock(&dentry->d_lock);
index d9acfa89c894bd1608580331e1d5b3018c59123b..d55a4b603188afaf0a0d2d58078bfc1a8a2a6680 100644 (file)
@@ -2132,8 +2132,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
                goto end_instantiate;
 
        if (!child) {
-               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-               child = d_alloc_parallel(dir, &qname, &wq);
+               child = d_alloc_parallel(dir, &qname);
                if (IS_ERR(child))
                        goto end_instantiate;
                if (d_in_lookup(child)) {
index 49ab74e0bfde31ae99d489aede6a5dc5f3c3ea71..04a382178c657b5af201a838fa15703d9eea9c7b 100644 (file)
@@ -692,8 +692,7 @@ static bool proc_sys_fill_cache(struct file *file,
 
        child = d_lookup(dir, &qname);
        if (!child) {
-               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-               child = d_alloc_parallel(dir, &qname, &wq);
+               child = d_alloc_parallel(dir, &qname);
                if (IS_ERR(child))
                        return false;
                if (d_in_lookup(child)) {
index e860fa08b5e3080ec2023762881e2b552a80b5b3..1ff77f3d1de09506ef3fd4d8a683dbd06a9bf0f6 100644 (file)
@@ -73,7 +73,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions;
        bool reparse_need_reval = false;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        int rc;
 
        cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
@@ -105,7 +104,7 @@ retry:
                    (fattr->cf_flags & CIFS_FATTR_NEED_REVAL))
                        return;
 
-               dentry = d_alloc_parallel(parent, name, &wq);
+               dentry = d_alloc_parallel(parent, name);
        }
        if (IS_ERR(dentry))
                return;
index 2577c05f84ec765f159fa9dca35a62ccd3db9d8e..97a887be150a4917cb03f467ab69e0403eed5af4 100644 (file)
@@ -116,10 +116,7 @@ struct dentry {
                                         * possible!
                                         */
 
-       union {
-               struct list_head d_lru;         /* LRU list */
-               wait_queue_head_t *d_wait;      /* in-lookup ones only */
-       };
+       struct list_head d_lru;         /* LRU list */
        struct hlist_node d_sib;        /* child of parent list */
        struct hlist_head d_children;   /* our children */
        /*
@@ -210,6 +207,9 @@ enum dentry_flags {
        DCACHE_REFERENCED               = BIT(6),       /* Recently used, don't discard. */
        DCACHE_DONTCACHE                = BIT(7),       /* Purge from memory on final dput() */
        DCACHE_CANT_MOUNT               = BIT(8),
+       DCACHE_LOOKUP_WAITERS           = BIT(9),       /* A thread is waiting for
+                                                        * PAR_LOOKUP to clear
+                                                        */
        DCACHE_SHRINK_LIST              = BIT(10),
        DCACHE_OP_WEAK_REVALIDATE       = BIT(11),
        /*
@@ -256,8 +256,7 @@ extern void d_delete(struct dentry *);
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct super_block *);
-extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
-                                       wait_queue_head_t *);
+extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 /* weird procfs mess; *NOT* exported */
 extern struct dentry * d_splice_alias_ops(struct inode *, struct dentry *,
index fcbd21b5685f46136a210c8e11c20a54d6ed9dad..6aced49d5f00e2006c6d518966b937936b8c4768 100644 (file)
@@ -1743,7 +1743,6 @@ struct nfs_unlinkdata {
        struct nfs_removeargs args;
        struct nfs_removeres res;
        struct dentry *dentry;
-       wait_queue_head_t wq;
        const struct cred *cred;
        struct nfs_fattr dir_attr;
        long timeout;