]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
Document rcu_read_lock() use in select_collect2()
authorAl Viro <viro@zeniv.linux.org.uk>
Sat, 11 Apr 2026 08:01:28 +0000 (04:01 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 5 Jun 2026 04:34:55 +0000 (00:34 -0400)
If select_collect2() finds something that is neither busy nor can
be moved to shrink list, it needs to return that to caller's caller
(shrink_dcache_tree()) ASAP and do so without grabbing references (among
other things, it might be already dying, in which case refcount can't be
incremented).  We are called inside a ->d_lock scope, but that scope is
going to be terminated as soon as we return to caller (d_walk()); ->d_lock
will be retaken by shrink_dcache_tree(), but we need to bridge between
these scopes, turning them into contiguous RCU read-side critical area.

We do that with rcu_read_lock() scope - it spans from unbalanced
rcu_read_lock() in select_collect2() to unbalanced rcu_read_unlock()
in shrink_dcache_tree().  That works, but it really needs to be documented;
it's rather unidiomatic and it had caused quite a bit of confusion - some
of it in form of patches "fixing" the damn thing.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/dcache.c

index ae9b7151e6a4caef6b4abdcf17700007315ae765..a65cb6451e630970074832ca27ec0decef3fbc28 100644 (file)
@@ -1638,6 +1638,15 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
 
        if (dentry->d_lockref.count <= 0) {
                if (!__move_to_shrink_list(dentry, &data->dispose)) {
+                       /*
+                        * We need an enter RCU read-side critical area that
+                        * would extend past the return from d_walk() and
+                        * we are in the scope of ->d_lock that will terminate
+                        * before that, so we use rcu_read_lock() to bridge
+                        * over to the scope of ->d_lock in d_walk() caller.
+                        * The scope of rcu_read_lock() spans from here to
+                        * paired rcu_read_unlock() in shrink_dcache_tree().
+                        */
                        rcu_read_lock();
                        data->victim = dentry;
                        return D_WALK_QUIT;
@@ -1682,9 +1691,20 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
                d_walk(parent, &data, select_collect2);
                if (data.victim) {
                        struct dentry *v = data.victim;
-
+                       /*
+                        * select_collect2() has picked a dentry that was
+                        * either dying or on a shrink list and arranged
+                        * for it to be returned to us.  We are still in
+                        * the RCU read-side critical area started there
+                        * (rcu_read_lock() scope opened in select_collect2()),
+                        * so dentry couldn't have been freed yet, but its
+                        * state might've changed since we dropped ->d_lock
+                        * on the way out.  Switch over to ->d_lock scope
+                        * and recheck the dentry state.
+                        */
                        spin_lock(&v->d_lock);
                        rcu_read_unlock();
+
                        if (v->d_lockref.count < 0 &&
                            !(v->d_flags & DCACHE_DENTRY_KILLED)) {
                                struct completion_list wait;