]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Free journal bufs when not in use
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 6 Feb 2025 00:13:39 +0000 (19:13 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 15 Mar 2025 01:02:13 +0000 (21:02 -0400)
Since we're increasing the number of 'struct journal_bufs', we don't
want them all permanently holding onto buffers for the journal data -
that'd be 16 * 2MB = 32MB, or potentially more.

Add a single-element mempool (open coded, since buffer size varies),
this also means we won't be hitting the memory allocator every time we
open and close a journal entry/buffer.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_types.h

index 26886513e2d29ebc6c4d10977f9aa3b9111b1148..d47a4dfa03e3b848b4cfb4f8b0c72e087d677f97 100644 (file)
@@ -58,9 +58,11 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
 
        prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
 
-       prt_printf(out, "size:\t");
-       prt_human_readable_u64(out, vstruct_bytes(buf->data));
-       prt_newline(out);
+       if (buf->data) {
+               prt_printf(out, "size:\t");
+               prt_human_readable_u64(out, vstruct_bytes(buf->data));
+               prt_newline(out);
+       }
 
        prt_printf(out, "expires:\t");
        prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
@@ -87,6 +89,9 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
 
 static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
 {
+       lockdep_assert_held(&j->lock);
+       out->atomic++;
+
        if (!out->nr_tabstops)
                printbuf_tabstop_push(out, 24);
 
@@ -95,6 +100,8 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
             seq++)
                bch2_journal_buf_to_text(out, j, seq);
        prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
+
+       --out->atomic;
 }
 
 static inline struct journal_buf *
@@ -104,10 +111,8 @@ journal_seq_to_buf(struct journal *j, u64 seq)
 
        EBUG_ON(seq > journal_cur_seq(j));
 
-       if (journal_seq_unwritten(j, seq)) {
+       if (journal_seq_unwritten(j, seq))
                buf = j->buf + (seq & JOURNAL_BUF_MASK);
-               EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
-       }
        return buf;
 }
 
@@ -398,8 +403,16 @@ static int journal_entry_open(struct journal *j)
                return JOURNAL_ERR_insufficient_devices; /* -EROFS */
        }
 
+       if (!j->free_buf && !buf->data)
+               return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
+
        BUG_ON(!j->cur_entry_sectors);
 
+       if (!buf->data) {
+               swap(buf->data,         j->free_buf);
+               swap(buf->buf_size,     j->free_buf_size);
+       }
+
        buf->expires            =
                (journal_cur_seq(j) == j->flushed_seq_ondisk
                 ? jiffies
@@ -514,6 +527,33 @@ static void journal_write_work(struct work_struct *work)
        spin_unlock(&j->lock);
 }
 
+static void journal_buf_prealloc(struct journal *j)
+{
+       if (j->free_buf &&
+           j->free_buf_size >= j->buf_size_want)
+               return;
+
+       unsigned buf_size = j->buf_size_want;
+
+       spin_unlock(&j->lock);
+       void *buf = kvmalloc(buf_size, GFP_NOFS);
+       spin_lock(&j->lock);
+
+       if (buf &&
+           (!j->free_buf ||
+            buf_size > j->free_buf_size)) {
+               swap(buf,       j->free_buf);
+               swap(buf_size,  j->free_buf_size);
+       }
+
+       if (unlikely(buf)) {
+               spin_unlock(&j->lock);
+               /* kvfree can sleep */
+               kvfree(buf);
+               spin_lock(&j->lock);
+       }
+}
+
 static int __journal_res_get(struct journal *j, struct journal_res *res,
                             unsigned flags)
 {
@@ -544,6 +584,8 @@ retry:
 
        spin_lock(&j->lock);
 
+       journal_buf_prealloc(j);
+
        /*
         * Recheck after taking the lock, so we don't race with another thread
         * that just did journal_entry_open() and call bch2_journal_entry_close()
@@ -571,20 +613,26 @@ unlock:
        can_discard = j->can_discard;
        spin_unlock(&j->lock);
 out:
+       if (likely(!ret))
+               return 0;
        if (ret == JOURNAL_ERR_retry)
                goto retry;
-       if (!ret)
-               return 0;
 
        if (journal_error_check_stuck(j, ret, flags))
                ret = -BCH_ERR_journal_res_get_blocked;
 
        if (ret == JOURNAL_ERR_max_in_flight &&
-           track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
-
+           track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) &&
+           trace_journal_entry_full_enabled()) {
                struct printbuf buf = PRINTBUF;
+
+               bch2_printbuf_make_room(&buf, 4096);
+
+               spin_lock(&j->lock);
                prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
                bch2_journal_bufs_to_text(&buf, j);
+               spin_unlock(&j->lock);
+
                trace_journal_entry_full(c, buf.buf);
                printbuf_exit(&buf);
                count_event(c, journal_entry_full);
@@ -951,7 +999,8 @@ static void __bch2_journal_block(struct journal *j)
                        new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL;
                } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
 
-               journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
+               if (old.cur_entry_offset < JOURNAL_ENTRY_BLOCKED_VAL)
+                       journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
        }
 }
 
@@ -1481,6 +1530,7 @@ void bch2_fs_journal_exit(struct journal *j)
 
        for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
                kvfree(j->buf[i].data);
+       kvfree(j->free_buf);
        free_fifo(&j->pin);
 }
 
@@ -1507,13 +1557,13 @@ int bch2_fs_journal_init(struct journal *j)
        if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
                return -BCH_ERR_ENOMEM_journal_pin_fifo;
 
-       for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
-               j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
-               j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
-               if (!j->buf[i].data)
-                       return -BCH_ERR_ENOMEM_journal_buf;
+       j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
+       j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
+       if (!j->free_buf)
+               return -BCH_ERR_ENOMEM_journal_buf;
+
+       for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
                j->buf[i].idx = i;
-       }
 
        j->pin.front = j->pin.back = 1;
 
@@ -1563,6 +1613,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        prt_printf(out, "average write size:\t");
        prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
        prt_newline(out);
+       prt_printf(out, "free buf:\t%u\n",                      j->free_buf ? j->free_buf_size : 0);
        prt_printf(out, "nr direct reclaim:\t%llu\n",           j->nr_direct_reclaim);
        prt_printf(out, "nr background reclaim:\t%llu\n",       j->nr_background_reclaim);
        prt_printf(out, "reclaim kicked:\t%u\n",                j->reclaim_kicked);
index f2ff28e6697c0b4652087be48f40b52b080cb250..61f71e7baff2dfb0f26b9c2e7fe20bf140dc15e4 100644 (file)
@@ -1640,6 +1640,21 @@ static CLOSURE_CALLBACK(journal_write_done)
                j->err_seq      = seq;
        w->write_done = true;
 
+       if (!j->free_buf || j->free_buf_size < w->buf_size) {
+               swap(j->free_buf,       w->data);
+               swap(j->free_buf_size,  w->buf_size);
+       }
+
+       if (w->data) {
+               void *buf = w->data;
+               w->data = NULL;
+               w->buf_size = 0;
+
+               spin_unlock(&j->lock);
+               kvfree(buf);
+               spin_lock(&j->lock);
+       }
+
        bool completed = false;
 
        for (seq = journal_last_unwritten_seq(j);
@@ -1649,7 +1664,7 @@ static CLOSURE_CALLBACK(journal_write_done)
                if (!w->write_done)
                        break;
 
-               if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
+               if (!j->err_seq && !w->noflush) {
                        j->flushed_seq_ondisk = seq;
                        j->last_seq_ondisk = w->last_seq;
 
index 43cd2a7e0f7f06365fee0799579b666639bb53d6..ee9cb17c3ccff5cb6442d7223840531a888cdd31 100644 (file)
@@ -156,6 +156,7 @@ enum journal_flags {
        x(journal_full)                 \
        x(journal_pin_full)             \
        x(journal_stuck)                \
+       x(enomem)                       \
        x(insufficient_devices)
 
 enum journal_errors {
@@ -218,6 +219,8 @@ struct journal {
         * other is possibly being written out.
         */
        struct journal_buf      buf[JOURNAL_BUF_NR];
+       void                    *free_buf;
+       unsigned                free_buf_size;
 
        spinlock_t              lock;