]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: bch2_journal_write() refactoring
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 3 May 2025 23:14:54 +0000 (19:14 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:14:45 +0000 (20:14 -0400)
Make the locking easier to follow; also take io_refs earlier, in
__journal_write_alloc().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal_io.c

index be86fd21de2a65325c63fb03ac92d2dfbf4d45f3..c593d77dc8f275e679b49c36889a5b5bc633d693 100644 (file)
@@ -1467,6 +1467,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
 
+       rcu_read_lock();
        darray_for_each(*devs, i) {
                struct bch_dev *ca = rcu_dereference(c->devs[*i]);
                if (!ca)
@@ -1488,6 +1489,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
                        ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
                }
        }
+       rcu_read_unlock();
 }
 
 static void __journal_write_alloc(struct journal *j,
@@ -1500,7 +1502,8 @@ static void __journal_write_alloc(struct journal *j,
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
 
        darray_for_each(*devs, i) {
-               struct bch_dev *ca = rcu_dereference(c->devs[*i]);
+               struct bch_dev *ca = bch2_dev_get_ioref(c, *i, WRITE,
+                                       BCH_DEV_WRITE_REF_journal_write);
                if (!ca)
                        continue;
 
@@ -1514,8 +1517,10 @@ static void __journal_write_alloc(struct journal *j,
                    ca->mi.state != BCH_MEMBER_STATE_rw ||
                    !ja->nr ||
                    bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
-                   sectors > ja->sectors_free)
+                   sectors > ja->sectors_free) {
+                       enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
                        continue;
+               }
 
                bch2_dev_stripe_increment(ca, &j->wp.stripe);
 
@@ -1538,15 +1543,8 @@ static void __journal_write_alloc(struct journal *j,
        }
 }
 
-/**
- * journal_write_alloc - decide where to write next journal entry
- *
- * @j:         journal object
- * @w:         journal buf (entry to be written)
- *
- * Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
- */
-static int journal_write_alloc(struct journal *j, struct journal_buf *w)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+                              unsigned *replicas)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_devs_mask devs;
@@ -1554,29 +1552,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
        unsigned sectors = vstruct_sectors(w->data, c->block_bits);
        unsigned target = c->opts.metadata_target ?:
                c->opts.foreground_target;
-       unsigned replicas = 0, replicas_want =
-               READ_ONCE(c->opts.metadata_replicas);
+       unsigned replicas_want = READ_ONCE(c->opts.metadata_replicas);
        unsigned replicas_need = min_t(unsigned, replicas_want,
                                       READ_ONCE(c->opts.metadata_replicas_required));
        bool advance_done = false;
 
-       rcu_read_lock();
-
-       /* We might run more than once if we have to stop and do discards: */
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
-       bkey_for_each_ptr(ptrs, p) {
-               struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
-               if (ca)
-                       replicas += ca->mi.durability;
-       }
-
 retry_target:
        devs = target_rw_devs(c, BCH_DATA_journal, target);
        devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
 retry_alloc:
-       __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
+       __journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
 
-       if (likely(replicas >= replicas_want))
+       if (likely(*replicas >= replicas_want))
                goto done;
 
        if (!advance_done) {
@@ -1585,18 +1572,16 @@ retry_alloc:
                goto retry_alloc;
        }
 
-       if (replicas < replicas_want && target) {
+       if (*replicas < replicas_want && target) {
                /* Retry from all devices: */
                target = 0;
                advance_done = false;
                goto retry_target;
        }
 done:
-       rcu_read_unlock();
-
        BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
 
-       return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
+       return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
 }
 
 static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
@@ -1782,13 +1767,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
        unsigned sectors = vstruct_sectors(w->data, c->block_bits);
 
        extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-               struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
-                                       BCH_DEV_WRITE_REF_journal_write);
-               if (!ca) {
-                       /* XXX: fix this */
-                       bch_err(c, "missing device %u for journal write", ptr->dev);
-                       continue;
-               }
+               struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
 
                this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
                             sectors);
@@ -2074,7 +2053,8 @@ CLOSURE_CALLBACK(bch2_journal_write)
 
        ret = bch2_journal_write_pick_flush(j, w);
        spin_unlock(&j->lock);
-       if (ret)
+
+       if (unlikely(ret))
                goto err;
 
        mutex_lock(&j->buf_lock);
@@ -2082,43 +2062,30 @@ CLOSURE_CALLBACK(bch2_journal_write)
 
        ret = bch2_journal_write_prep(j, w);
        mutex_unlock(&j->buf_lock);
-       if (ret)
-               goto err;
 
-       j->entry_bytes_written += vstruct_bytes(w->data);
+       if (unlikely(ret))
+               goto err;
 
+       unsigned replicas_allocated = 0;
        while (1) {
-               spin_lock(&j->lock);
-               ret = journal_write_alloc(j, w);
+               ret = journal_write_alloc(j, w, &replicas_allocated);
                if (!ret || !j->can_discard)
                        break;
 
-               spin_unlock(&j->lock);
                bch2_journal_do_discards(j);
        }
 
-       if (ret && !bch2_journal_error(j)) {
-               struct printbuf buf = PRINTBUF;
-               buf.atomic++;
-
-               __bch2_journal_debug_to_text(&buf, j);
-               spin_unlock(&j->lock);
-               prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
-                                         le64_to_cpu(w->data->seq),
-                                         vstruct_sectors(w->data, c->block_bits),
-                                         bch2_err_str(ret));
-               bch2_print_str(c, KERN_ERR, buf.buf);
-               printbuf_exit(&buf);
-       }
-       if (ret)
-               goto err;
+       if (unlikely(ret))
+               goto err_allocate_write;
 
+       spin_lock(&j->lock);
        /*
         * write is allocated, no longer need to account for it in
         * bch2_journal_space_available():
         */
        w->sectors = 0;
        w->write_allocated = true;
+       j->entry_bytes_written += vstruct_bytes(w->data);
 
        /*
         * journal entry has been compacted and allocated, recalculate space
@@ -2130,9 +2097,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
 
        w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
 
-       if (c->opts.nochanges)
-               goto no_io;
-
        /*
         * Mark journal replicas before we submit the write to guarantee
         * recovery will find the journal entries after a crash.
@@ -2143,15 +2107,33 @@ CLOSURE_CALLBACK(bch2_journal_write)
        if (ret)
                goto err;
 
+       if (c->opts.nochanges)
+               goto no_io;
+
        if (!JSET_NO_FLUSH(w->data))
                continue_at(cl, journal_write_preflush, j->wq);
        else
                continue_at(cl, journal_write_submit, j->wq);
        return;
-no_io:
-       continue_at(cl, journal_write_done, j->wq);
-       return;
+err_allocate_write:
+       if (!bch2_journal_error(j)) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_journal_debug_to_text(&buf, j);
+               prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
+                                         le64_to_cpu(w->data->seq),
+                                         vstruct_sectors(w->data, c->block_bits),
+                                         bch2_err_str(ret));
+               bch2_print_str(c, KERN_ERR, buf.buf);
+               printbuf_exit(&buf);
+       }
 err:
        bch2_fatal_error(c);
+no_io:
+       extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+               struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+               enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
+       }
+
        continue_at(cl, journal_write_done, j->wq);
 }