#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
+#include "extent_io.h"
#include "relocation.h"
#include "file-item.h"
btrfs_tree_unlock(buf);
free_extent_buffer_stale(buf);
btrfs_mark_buffer_dirty(trans, cow);
+
+ btrfs_inhibit_eb_writeback(trans, cow);
+
*cow_ret = cow;
return 0;
return ret;
}
-static inline bool should_cow_block(const struct btrfs_trans_handle *trans,
+static inline bool should_cow_block(struct btrfs_trans_handle *trans,
const struct btrfs_root *root,
- const struct extent_buffer *buf)
+ struct extent_buffer *buf)
{
if (btrfs_is_testing(root->fs_info))
return false;
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
return true;
+ btrfs_inhibit_eb_writeback(trans, buf);
return false;
}
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/fsverity.h>
+#include <linux/lockdep.h>
#include "extent_io.h"
#include "extent-io-tree.h"
#include "extent_map.h"
* of time.
*/
spin_lock(&eb->refs_lock);
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+ if ((wbc->sync_mode == WB_SYNC_ALL ||
+ atomic_read(&eb->writeback_inhibitors) == 0) &&
+ test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
XA_STATE(xas, &fs_info->buffer_tree, eb->start >> fs_info->nodesize_bits);
unsigned long flags;
kmem_cache_free(extent_buffer_cache, eb);
}
+/*
+ * Inhibit writeback on buffer during transaction.
+ *
+ * @trans: transaction handle that will own the inhibitor
+ * @eb: extent buffer to inhibit writeback on
+ *
+ * Attempt to track this extent buffer in the transaction's inhibited set. If
+ * memory allocation fails, the buffer is simply not tracked. It may be written
+ * back and need re-COW, which is the original behavior. This is acceptable
+ * since inhibiting writeback is an optimization.
+ */
+void btrfs_inhibit_eb_writeback(struct btrfs_trans_handle *trans, struct extent_buffer *eb)
+{
+ unsigned long index = eb->start >> trans->fs_info->nodesize_bits;
+ void *old;
+
+ lockdep_assert_held(&eb->lock);
+ /* Check if already inhibited by this handle. */
+ old = xa_load(&trans->writeback_inhibited_ebs, index);
+ if (old == eb)
+ return;
+
+ /* Take reference for the xarray entry. */
+ refcount_inc(&eb->refs);
+
+ old = xa_store(&trans->writeback_inhibited_ebs, index, eb, GFP_NOFS);
+ if (xa_is_err(old)) {
+ /* Allocation failed, just skip inhibiting this buffer. */
+ free_extent_buffer(eb);
+ return;
+ }
+
+ /* Handle replacement of different eb at same index. */
+ if (old && old != eb) {
+ struct extent_buffer *old_eb = old;
+
+ atomic_dec(&old_eb->writeback_inhibitors);
+ free_extent_buffer(old_eb);
+ }
+
+ atomic_inc(&eb->writeback_inhibitors);
+}
+
+/*
+ * Uninhibit writeback on all extent buffers.
+ */
+void btrfs_uninhibit_all_eb_writeback(struct btrfs_trans_handle *trans)
+{
+ struct extent_buffer *eb;
+ unsigned long index;
+
+ xa_for_each(&trans->writeback_inhibited_ebs, index, eb) {
+ atomic_dec(&eb->writeback_inhibitors);
+ free_extent_buffer(eb);
+ }
+ xa_destroy(&trans->writeback_inhibited_ebs);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
eb->len = fs_info->nodesize;
eb->fs_info = fs_info;
init_rwsem(&eb->lock);
+ atomic_set(&eb->writeback_inhibitors, 0);
btrfs_leak_debug_add_eb(eb);
spinlock_t refs_lock;
refcount_t refs;
int read_mirror;
+ /* Inhibit WB_SYNC_NONE writeback when > 0. */
+ atomic_t writeback_inhibitors;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
s8 log_index;
u8 folio_shift;
#define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0)
#endif
+void btrfs_inhibit_eb_writeback(struct btrfs_trans_handle *trans,
+ struct extent_buffer *eb);
+void btrfs_uninhibit_all_eb_writeback(struct btrfs_trans_handle *trans);
+
#endif
#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
+#include "extent_io.h"
#include "transaction.h"
#include "locking.h"
#include "tree-log.h"
goto alloc_fail;
}
+ xa_init(&h->writeback_inhibited_ebs);
+
/*
* If we are JOIN_NOLOCK we're already committing a transaction and
* waiting on this guy, so we don't need to do the sb_start_intwrite
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(info->sb);
+ /*
+ * Uninhibit extent buffer writeback before decrementing num_writers,
+ * since the decrement wakes the committing thread which needs all
+ * buffers uninhibited to write them to disk.
+ */
+ btrfs_uninhibit_all_eb_writeback(trans);
+
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
atomic_dec(&cur_trans->num_writers);
if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
btrfs_scrub_cancel(fs_info);
+ btrfs_uninhibit_all_eb_writeback(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
fs_info->cleaner_kthread)
wake_up_process(fs_info->cleaner_kthread);
+ /*
+ * Uninhibit writeback on all extent buffers inhibited during this
+ * transaction before writing them to disk. Inhibiting prevented
+ * writeback while the transaction was building, but now we need
+ * them written.
+ */
+ btrfs_uninhibit_all_eb_writeback(trans);
+
ret = btrfs_write_and_wait_transaction(trans);
if (unlikely(ret)) {
btrfs_err(fs_info, "error while writing out transaction: %d", ret);
#include <linux/time64.h>
#include <linux/mutex.h>
#include <linux/wait.h>
+#include <linux/xarray.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
struct btrfs_fs_info *fs_info;
struct list_head new_bgs;
struct btrfs_block_rsv delayed_rsv;
+ /* Extent buffers with writeback inhibited by this handle. */
+ struct xarray writeback_inhibited_ebs;
};
/*