]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: Fix race between trans_put() and btree_transactions_read()
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 23 Jun 2024 02:02:09 +0000 (22:02 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 23 Jun 2024 04:57:21 +0000 (00:57 -0400)
debug.c was using closure_get() on a different thread's closure where
the we don't know if the object being refcounted is alive.

We keep btree_trans objects on a list so they can be printed by debug
code, and because it is cost prohibitive to touch the btree_trans list
every time we allocate and free btree_trans objects, cached objects are
also on this list.

However, we do not want the debug code to see cached but not in use
btree_trans objects - critically because the btree_paths array will have
been freed (if it was reallocated).

closure_get() is also incorrect to use when that get may race with it
hitting zero, i.e. we must already have a ref on the object or know the
ref can't currently hit 0 for other reasons (as used in the cycle
detector).

to fix this, use the previously introduced closure_get_not_zero(),
closure_return_sync(), and closure_init_stack_release(); the debug code
now can only take a ref on a trans object if it's alive and in use.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/debug.c

index 3a1419d1788856b5dfc6a792577d04ef48707471..15c1c7cfefe604b83a7510ba8ae836bfdc96702f 100644 (file)
@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
 
        trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
        memset(trans, 0, sizeof(*trans));
-       closure_init_stack(&trans->ref);
 
        seqmutex_lock(&c->btree_trans_lock);
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@@ -3161,7 +3160,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
 list_add_done:
        seqmutex_unlock(&c->btree_trans_lock);
 got_trans:
-       trans->ref.closure_get_happened = false;
        trans->c                = c;
        trans->last_begin_time  = local_clock();
        trans->fn_idx           = fn_idx;
@@ -3200,6 +3198,8 @@ got_trans:
        trans->srcu_idx         = srcu_read_lock(&c->btree_trans_barrier);
        trans->srcu_lock_time   = jiffies;
        trans->srcu_held        = true;
+
+       closure_init_stack_release(&trans->ref);
        return trans;
 }
 
@@ -3257,10 +3257,10 @@ void bch2_trans_put(struct btree_trans *trans)
                bch2_journal_keys_put(c);
 
        /*
-        * trans->ref protects trans->locking_wait.task, btree_paths arary; used
+        * trans->ref protects trans->locking_wait.task, btree_paths array; used
         * by cycle detector
         */
-       closure_sync(&trans->ref);
+       closure_return_sync(&trans->ref);
        trans->locking_wait.task = NULL;
 
        unsigned long *paths_allocated = trans->paths_allocated;
@@ -3385,8 +3385,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
                                per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
 
                        if (trans) {
-                               closure_sync(&trans->ref);
-
                                seqmutex_lock(&c->btree_trans_lock);
                                list_del(&trans->list);
                                seqmutex_unlock(&c->btree_trans_lock);
index ecfdb21ebade9fa9b33726e516341595791ebfb4..61c50522abb955c337b732ff233ffec45b54553d 100644 (file)
@@ -587,14 +587,10 @@ restart:
                if (!task || task->pid <= i->iter)
                        continue;
 
-               closure_get(&trans->ref);
-               u32 seq = seqmutex_unlock(&c->btree_trans_lock);
+               if (!closure_get_not_zero(&trans->ref))
+                       continue;
 
-               ret = flush_buf(i);
-               if (ret) {
-                       closure_put(&trans->ref);
-                       goto unlocked;
-               }
+               u32 seq = seqmutex_unlock(&c->btree_trans_lock);
 
                bch2_btree_trans_to_text(&i->buf, trans);
 
@@ -604,10 +600,12 @@ restart:
                printbuf_indent_sub(&i->buf, 2);
                prt_newline(&i->buf);
 
-               i->iter = task->pid;
-
                closure_put(&trans->ref);
 
+               ret = flush_buf(i);
+               if (ret)
+                       goto unlocked;
+
                if (!seqmutex_relock(&c->btree_trans_lock, seq))
                        goto restart;
        }
@@ -817,7 +815,8 @@ restart:
 
                iter = task->pid;
 
-               closure_get(&trans->ref);
+               if (!closure_get_not_zero(&trans->ref))
+                       continue;
 
                u32 seq = seqmutex_unlock(&c->btree_trans_lock);