bcachefs: bch2_journal_write() refactoring

author Kent Overstreet <kent.overstreet@linux.dev>

Sat, 3 May 2025 23:14:54 +0000 (19:14 -0400)

committer Kent Overstreet <kent.overstreet@linux.dev>

Thu, 22 May 2025 00:14:45 +0000 (20:14 -0400)
author Kent Overstreet <kent.overstreet@linux.dev>
Sat, 3 May 2025 23:14:54 +0000 (19:14 -0400)
committer Kent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:14:45 +0000 (20:14 -0400)
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c

index be86fd21de2a65325c63fb03ac92d2dfbf4d45f3..c593d77dc8f275e679b49c36889a5b5bc633d693 100644 (file)
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1467,6 +1467,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
  
+       rcu_read_lock();
         darray_for_each(*devs, i) {
                 struct bch_dev *ca = rcu_dereference(c->devs[*i]);
                 if (!ca)
@@ -1488,6 +1489,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
                         ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
                 }
         }
+       rcu_read_unlock();
  }
  
  static void __journal_write_alloc(struct journal *j,
@@ -1500,7 +1502,8 @@ static void __journal_write_alloc(struct journal *j,
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
  
         darray_for_each(*devs, i) {
-               struct bch_dev *ca = rcu_dereference(c->devs[*i]);
+               struct bch_dev *ca = bch2_dev_get_ioref(c, *i, WRITE,
+                                       BCH_DEV_WRITE_REF_journal_write);
                 if (!ca)
                         continue;
  
@@ -1514,8 +1517,10 @@ static void __journal_write_alloc(struct journal *j,
                     ca->mi.state != BCH_MEMBER_STATE_rw ||
                     !ja->nr ||
                     bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
-                   sectors > ja->sectors_free)
+                   sectors > ja->sectors_free) {
+                       enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
                         continue;
+               }
  
                 bch2_dev_stripe_increment(ca, &j->wp.stripe);
  
@@ -1538,15 +1543,8 @@ static void __journal_write_alloc(struct journal *j,
         }
  }
  
-/**
- * journal_write_alloc - decide where to write next journal entry
- *
- * @j:         journal object
- * @w:         journal buf (entry to be written)
- *
- * Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
- */
-static int journal_write_alloc(struct journal *j, struct journal_buf *w)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+                              unsigned *replicas)
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
         struct bch_devs_mask devs;
@@ -1554,29 +1552,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
         unsigned sectors = vstruct_sectors(w->data, c->block_bits);
         unsigned target = c->opts.metadata_target ?:
                 c->opts.foreground_target;
-       unsigned replicas = 0, replicas_want =
-               READ_ONCE(c->opts.metadata_replicas);
+       unsigned replicas_want = READ_ONCE(c->opts.metadata_replicas);
         unsigned replicas_need = min_t(unsigned, replicas_want,
                                        READ_ONCE(c->opts.metadata_replicas_required));
         bool advance_done = false;
  
-       rcu_read_lock();
-
-       /* We might run more than once if we have to stop and do discards: */
-       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
-       bkey_for_each_ptr(ptrs, p) {
-               struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
-               if (ca)
-                       replicas += ca->mi.durability;
-       }
-
  retry_target:
         devs = target_rw_devs(c, BCH_DATA_journal, target);
         devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
  retry_alloc:
-       __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
+       __journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
  
-       if (likely(replicas >= replicas_want))
+       if (likely(*replicas >= replicas_want))
                 goto done;
  
         if (!advance_done) {
@@ -1585,18 +1572,16 @@ retry_alloc:
                 goto retry_alloc;
         }
  
-       if (replicas < replicas_want && target) {
+       if (*replicas < replicas_want && target) {
                 /* Retry from all devices: */
                 target = 0;
                 advance_done = false;
                 goto retry_target;
         }
  done:
-       rcu_read_unlock();
-
         BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
  
-       return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
+       return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
  }
  
  static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
@@ -1782,13 +1767,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
         unsigned sectors = vstruct_sectors(w->data, c->block_bits);
  
         extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-               struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
-                                       BCH_DEV_WRITE_REF_journal_write);
-               if (!ca) {
-                       /* XXX: fix this */
-                       bch_err(c, "missing device %u for journal write", ptr->dev);
-                       continue;
-               }
+               struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
  
                 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
                              sectors);
@@ -2074,7 +2053,8 @@ CLOSURE_CALLBACK(bch2_journal_write)
  
         ret = bch2_journal_write_pick_flush(j, w);
         spin_unlock(&j->lock);
-       if (ret)
+
+       if (unlikely(ret))
                 goto err;
  
         mutex_lock(&j->buf_lock);
@@ -2082,43 +2062,30 @@ CLOSURE_CALLBACK(bch2_journal_write)
  
         ret = bch2_journal_write_prep(j, w);
         mutex_unlock(&j->buf_lock);
-       if (ret)
-               goto err;
  
-       j->entry_bytes_written += vstruct_bytes(w->data);
+       if (unlikely(ret))
+               goto err;
  
+       unsigned replicas_allocated = 0;
         while (1) {
-               spin_lock(&j->lock);
-               ret = journal_write_alloc(j, w);
+               ret = journal_write_alloc(j, w, &replicas_allocated);
                 if (!ret || !j->can_discard)
                         break;
  
-               spin_unlock(&j->lock);
                 bch2_journal_do_discards(j);
         }
  
-       if (ret && !bch2_journal_error(j)) {
-               struct printbuf buf = PRINTBUF;
-               buf.atomic++;
-
-               __bch2_journal_debug_to_text(&buf, j);
-               spin_unlock(&j->lock);
-               prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
-                                         le64_to_cpu(w->data->seq),
-                                         vstruct_sectors(w->data, c->block_bits),
-                                         bch2_err_str(ret));
-               bch2_print_str(c, KERN_ERR, buf.buf);
-               printbuf_exit(&buf);
-       }
-       if (ret)
-               goto err;
+       if (unlikely(ret))
+               goto err_allocate_write;
  
+       spin_lock(&j->lock);
         /*
          * write is allocated, no longer need to account for it in
          * bch2_journal_space_available():
          */
         w->sectors = 0;
         w->write_allocated = true;
+       j->entry_bytes_written += vstruct_bytes(w->data);
  
         /*
          * journal entry has been compacted and allocated, recalculate space
@@ -2130,9 +2097,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
  
         w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
  
-       if (c->opts.nochanges)
-               goto no_io;
-
         /*
          * Mark journal replicas before we submit the write to guarantee
          * recovery will find the journal entries after a crash.
@@ -2143,15 +2107,33 @@ CLOSURE_CALLBACK(bch2_journal_write)
         if (ret)
                 goto err;
  
+       if (c->opts.nochanges)
+               goto no_io;
+
         if (!JSET_NO_FLUSH(w->data))
                 continue_at(cl, journal_write_preflush, j->wq);
         else
                 continue_at(cl, journal_write_submit, j->wq);
         return;
-no_io:
-       continue_at(cl, journal_write_done, j->wq);
-       return;
+err_allocate_write:
+       if (!bch2_journal_error(j)) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_journal_debug_to_text(&buf, j);
+               prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
+                                         le64_to_cpu(w->data->seq),
+                                         vstruct_sectors(w->data, c->block_bits),
+                                         bch2_err_str(ret));
+               bch2_print_str(c, KERN_ERR, buf.buf);
+               printbuf_exit(&buf);
+       }
  err:
         bch2_fatal_error(c);
+no_io:
+       extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+               struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+               enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
+       }
+
         continue_at(cl, journal_write_done, j->wq);
  }
author	Kent Overstreet <kent.overstreet@linux.dev>
	Sat, 3 May 2025 23:14:54 +0000 (19:14 -0400)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Thu, 22 May 2025 00:14:45 +0000 (20:14 -0400)