]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
get rid of busy-waiting in shrink_dcache_tree()
authorAl Viro <viro@zeniv.linux.org.uk>
Wed, 21 Jan 2026 23:17:12 +0000 (18:17 -0500)
committerAl Viro <viro@zeniv.linux.org.uk>
Sat, 4 Apr 2026 08:03:56 +0000 (04:03 -0400)
If shrink_dcache_tree() runs into a potential victim that is already
dying, it must wait for that dentry to go away.  To avoid busy-waiting
we need some object to wait on and a way for dentry_unlist() to see that
we need to be notified.

The obvious place for the object to wait on would be on our stack frame.
We will store a pointer to that object (struct completion_list) in victim
dentry; if there's more than one thread wanting to wait for the same
dentry to finish dying, we'll have their instances linked into a list,
with reference in dentry pointing to the head of that list.

* new object - struct completion_list.  A pair of struct completion and
pointer to the next instance.  That's what shrink_dcache_tree() will wait
on if needed.

* add a new member (->waiters, opaque pointer to struct completion_list)
to struct dentry.  It is defined for negative live dentries that are
not in-lookup ones and it will remain NULL for almost all of them.

It does not conflict with ->d_rcu (defined for killed dentries), ->d_alias
(defined for positive dentries, all live) or ->d_in_lookup_hash (defined
for in-lookup dentries, all live negative).  That allows to colocate
all four members.

* make sure that all places where dentry enters the state where ->waiters
is defined (live, negative, not-in-lookup) initialize ->waiters to NULL.

* if select_collect2() runs into a dentry that is already dying, have
its caller insert a local instance of struct completion_list into the
head of the list hanging off dentry->waiters and wait for completion.

* if dentry_unlist() sees non-NULL ->waiters, have it carefully walk
through the completion_list instances in that list, calling complete()
for each.

For now struct completion_list is local to fs/dcache.c; it's obviously
dentry-agnostic, and it can be trivially lifted into linux/completion.h
if somebody finds a reason to do so...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/dcache.c
include/linux/dcache.h

index 616a445ec7207afd1d66880f602a8992137eb9a7..0c8faeee02e22ce9a1ca59aee55aab002402a970 100644 (file)
@@ -456,6 +456,15 @@ static void dentry_unlink_inode(struct dentry * dentry)
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_clear_type_and_inode(dentry);
        hlist_del_init(&dentry->d_alias);
+       /*
+        * dentry becomes negative, so the space occupied by ->d_alias
+        * belongs to ->waiters now; we could use __hlist_del() instead
+        * of hlist_del_init(), if not for the stunt pulled by nfs
+        * dummy root dentries - positive dentry *not* included into
+        * the alias list of its inode.  Open-coding hlist_del_init()
+        * and removing zeroing would be too clumsy...
+        */
+       dentry->waiters = NULL;
        raw_write_seqcount_end(&dentry->d_seq);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&inode->i_lock);
@@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+struct completion_list {
+       struct completion_list *next;
+       struct completion completion;
+};
+
+/*
+ *  shrink_dcache_tree() needs to be notified when dentry in process of
+ *  being evicted finally gets unlisted.  Such dentries are
+ *     already with negative ->d_count
+ *     already negative
+ *     already not in in-lookup hash
+ *     reachable only via ->d_sib.
+ *
+ *  Use ->waiters for a single-linked list of struct completion_list of
+ *  waiters.
+ */
+static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
+{
+       struct completion_list *v = dentry->waiters;
+       init_completion(&p->completion);
+       p->next = v;
+       dentry->waiters = p;
+}
+
+static inline void d_complete_waiters(struct dentry *dentry)
+{
+       struct completion_list *v = dentry->waiters;
+       if (unlikely(v)) {
+               /* some shrink_dcache_tree() instances are waiting */
+               dentry->waiters = NULL;
+               while (v) {
+                       struct completion *r = &v->completion;
+                       v = v->next;
+                       complete(r);
+               }
+       }
+}
+
 static inline void dentry_unlist(struct dentry *dentry)
 {
        struct dentry *next;
@@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry)
         * attached to the dentry tree
         */
        dentry->d_flags |= DCACHE_DENTRY_KILLED;
+       d_complete_waiters(dentry);
        if (unlikely(hlist_unhashed(&dentry->d_sib)))
                return;
        __hlist_del(&dentry->d_sib);
@@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
                        return D_WALK_QUIT;
                }
                to_shrink_list(dentry, &data->dispose);
+       } else if (dentry->d_lockref.count < 0) {
+               rcu_read_lock();
+               data->victim = dentry;
+               return D_WALK_QUIT;
        }
        /*
         * We can return to the caller if we have found some (this
@@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
                data.victim = NULL;
                d_walk(parent, &data, select_collect2);
                if (data.victim) {
-                       spin_lock(&data.victim->d_lock);
-                       if (!lock_for_kill(data.victim)) {
-                               spin_unlock(&data.victim->d_lock);
+                       struct dentry *v = data.victim;
+
+                       spin_lock(&v->d_lock);
+                       if (v->d_lockref.count < 0 &&
+                           !(v->d_flags & DCACHE_DENTRY_KILLED)) {
+                               struct completion_list wait;
+                               // It's busy dying; have it notify us once
+                               // it becomes invisible to d_walk().
+                               d_add_waiter(v, &wait);
+                               spin_unlock(&v->d_lock);
+                               rcu_read_unlock();
+                               if (!list_empty(&data.dispose))
+                                       shrink_dentry_list(&data.dispose);
+                               wait_for_completion(&wait.completion);
+                               continue;
+                       }
+                       if (!lock_for_kill(v)) {
+                               spin_unlock(&v->d_lock);
                                rcu_read_unlock();
                        } else {
-                               shrink_kill(data.victim);
+                               shrink_kill(v);
                        }
                }
                if (!list_empty(&data.dispose))
@@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
        INIT_HLIST_BL_NODE(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_HLIST_HEAD(&dentry->d_children);
-       INIT_HLIST_NODE(&dentry->d_alias);
+       dentry->waiters = NULL;
        INIT_HLIST_NODE(&dentry->d_sib);
 
        if (dentry->d_op && dentry->d_op->d_init) {
@@ -2729,7 +2796,7 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
        d_wait = dentry->d_wait;
        dentry->d_wait = NULL;
        hlist_bl_unlock(b);
-       INIT_HLIST_NODE(&dentry->d_alias);
+       dentry->waiters = NULL;
        INIT_LIST_HEAD(&dentry->d_lru);
        return d_wait;
 }
index f939d2ed10a36cbe13c6a587ef334a6b6e32c978..19098253f2dd93b4a876a69e5a921d00c37c8253 100644 (file)
@@ -88,6 +88,7 @@ union shortname_store {
 
 #define d_lock d_lockref.lock
 #define d_iname d_shortname.string
+struct completion_list;
 
 struct dentry {
        /* RCU lookup touched fields */
@@ -122,12 +123,23 @@ struct dentry {
        struct hlist_node d_sib;        /* child of parent list */
        struct hlist_head d_children;   /* our children */
        /*
-        * d_alias and d_rcu can share memory
+        * the following members can share memory - their uses are
+        * mutually exclusive.
         */
        union {
-               struct hlist_node d_alias;      /* inode alias list */
-               struct hlist_bl_node d_in_lookup_hash;  /* only for in-lookup ones */
+               /* positives: inode alias list */
+               struct hlist_node d_alias;
+               /* in-lookup ones (all negative, live): hash chain */
+               struct hlist_bl_node d_in_lookup_hash;
+               /* killed ones: (already negative) used to schedule freeing */
                struct rcu_head d_rcu;
+               /*
+                * live non-in-lookup negatives: used if shrink_dcache_tree()
+                * races with eviction by another thread and needs to wait for
+                * this dentry to get killed .  Remains NULL for almost all
+                * negative dentries.
+                */
+               struct completion_list *waiters;
        };
 };