#include "verity.h"
#include "super.h"
#include "orphan.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
u64 index;
};
+/*
+ * Used by data_reloc_print_warning_inode() to pass needed info for filename
+ * resolution and output of error message.
+ */
+struct data_reloc_warn {
+ struct btrfs_path path;
+ struct btrfs_fs_info *fs_info;
+ u64 extent_item_size;
+ u64 logical;
+ int mirror_num;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
u64 ram_bytes, int compress_type,
int type);
+static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
+ u64 root, void *warn_ctx)
+{
+ struct data_reloc_warn *warn = warn_ctx;
+ struct btrfs_fs_info *fs_info = warn->fs_info;
+ struct extent_buffer *eb;
+ struct btrfs_inode_item *inode_item;
+ struct inode_fs_paths *ipath = NULL;
+ struct btrfs_root *local_root;
+ struct btrfs_key key;
+ unsigned int nofs_flag;
+ u32 nlink;
+ int ret;
+
+ local_root = btrfs_get_fs_root(fs_info, root, true);
+ if (IS_ERR(local_root)) {
+ ret = PTR_ERR(local_root);
+ goto err;
+ }
+
+ /* This makes the path point to (inum INODE_ITEM ioff). */
+ key.objectid = inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, local_root, &key, &warn->path, 0, 0);
+ if (ret) {
+ btrfs_put_root(local_root);
+ btrfs_release_path(&warn->path);
+ goto err;
+ }
+
+ eb = warn->path.nodes[0];
+ inode_item = btrfs_item_ptr(eb, warn->path.slots[0], struct btrfs_inode_item);
+ nlink = btrfs_inode_nlink(eb, inode_item);
+ btrfs_release_path(&warn->path);
+
+ nofs_flag = memalloc_nofs_save();
+ ipath = init_ipath(4096, local_root, &warn->path);
+ memalloc_nofs_restore(nofs_flag);
+ if (IS_ERR(ipath)) {
+ btrfs_put_root(local_root);
+ ret = PTR_ERR(ipath);
+ ipath = NULL;
+ /*
+ * -ENOMEM, not a critical error, just output an generic error
+ * without filename.
+ */
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu, inode %llu offset %llu",
+ warn->logical, warn->mirror_num, root, inum, offset);
+ return ret;
+ }
+ ret = paths_from_inode(inum, ipath);
+ if (ret < 0)
+ goto err;
+
+ /*
+ * We deliberately ignore the bit ipath might have been too small to
+ * hold all of the paths here
+ */
+ for (int i = 0; i < ipath->fspath->elem_cnt; i++) {
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu length %u links %u (path: %s)",
+ warn->logical, warn->mirror_num, root, inum, offset,
+ fs_info->sectorsize, nlink,
+ (char *)(unsigned long)ipath->fspath->val[i]);
+ }
+
+ btrfs_put_root(local_root);
+ free_ipath(ipath);
+ return 0;
+
+err:
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d",
+ warn->logical, warn->mirror_num, root, inum, offset, ret);
+
+ free_ipath(ipath);
+ return ret;
+}
+
+/*
+ * Do extra user-friendly error output (e.g. lookup all the affected files).
+ *
+ * Return true if we succeeded doing the backref lookup.
+ * Return false if such lookup failed, and has to fallback to the old error message.
+ */
+static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off,
+ const u8 *csum, const u8 *csum_expected,
+ int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_path path = { 0 };
+ struct btrfs_key found_key = { 0 };
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ const u32 csum_size = fs_info->csum_size;
+ u64 logical;
+ u64 flags;
+ u32 item_size;
+ int ret;
+
+ mutex_lock(&fs_info->reloc_mutex);
+ logical = btrfs_get_reloc_bg_bytenr(fs_info);
+ mutex_unlock(&fs_info->reloc_mutex);
+
+ if (logical == U64_MAX) {
+ btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation");
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid, btrfs_ino(inode), file_off,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+ return;
+ }
+
+ logical += file_off;
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid,
+ btrfs_ino(inode), file_off, logical,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+
+ ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags);
+ if (ret < 0) {
+ btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d",
+ logical, ret);
+ return;
+ }
+ eb = path.nodes[0];
+ ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
+ item_size = btrfs_item_size(eb, path.slots[0]);
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ unsigned long ptr = 0;
+ u64 ref_root;
+ u8 ref_level;
+
+ while (true) {
+ ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+ item_size, &ref_root,
+ &ref_level);
+ if (ret < 0) {
+ btrfs_warn_rl(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
+
+ btrfs_warn_rl(fs_info,
+"csum error at logical %llu mirror %u: metadata %s (level %d) in tree %llu",
+ logical, mirror_num,
+ (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
+ btrfs_release_path(&path);
+ } else {
+ struct btrfs_backref_walk_ctx ctx = { 0 };
+ struct data_reloc_warn reloc_warn = { 0 };
+
+ btrfs_release_path(&path);
+
+ ctx.bytenr = found_key.objectid;
+ ctx.extent_item_pos = logical - found_key.objectid;
+ ctx.fs_info = fs_info;
+
+ reloc_warn.logical = logical;
+ reloc_warn.extent_item_size = found_key.offset;
+ reloc_warn.mirror_num = mirror_num;
+ reloc_warn.fs_info = fs_info;
+
+ iterate_extent_inodes(&ctx, true,
+ data_reloc_print_warning_inode, &reloc_warn);
+ }
+}
+
static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
const u32 csum_size = root->fs_info->csum_size;
+ /* For data reloc tree, it's better to do a backref lookup instead. */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return print_data_reloc_error(inode, logical_start, csum,
+ csum_expected, mirror_num);
+
/* Output without objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) {
btrfs_warn_rl(root->fs_info,
{
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
- inode->vfs_inode.i_mapping, start,
+ mapping, start,
pages,
&nr_pages,
&total_in,
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
- unsigned long page_error_op;
- page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
+ if (ret < 0)
+ mapping_set_error(mapping, -EIO);
/*
* inline extent creation worked or returned error,
clear_flags,
PAGE_UNLOCK |
PAGE_START_WRITEBACK |
- page_error_op |
PAGE_END_WRITEBACK);
/*
unsigned long nr_written = 0;
int page_started = 0;
int ret;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = start,
+ .range_end = end,
+ .no_cgroup_owner = 1,
+ };
/*
* Call cow_file_range() to run the delalloc range directly, since we
const u64 page_start = page_offset(locked_page);
const u64 page_end = page_start + PAGE_SIZE - 1;
- btrfs_page_set_error(inode->root->fs_info, locked_page,
- page_start, PAGE_SIZE);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
end_extent_writepage(locked_page, ret, page_start, page_end);
}
/* All pages will be unlocked, including @locked_page */
- return extent_write_locked_range(&inode->vfs_inode, start, end);
+ wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+ ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
+ wbc_detach_inode(&wbc);
+ return ret;
}
static int submit_one_async_extent(struct btrfs_inode *inode,
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_map *em;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, /* file_offset */
+ ordered = btrfs_alloc_ordered_extent(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
async_extent->ram_size, /* ram_bytes */
ins.objectid, /* disk_bytenr */
0, /* offset */
1 << BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_START_WRITEBACK);
-
- btrfs_submit_compressed_write(inode, start, /* file_offset */
- async_extent->ram_size, /* num_bytes */
- ins.objectid, /* disk_bytenr */
- ins.offset, /* compressed_len */
+ btrfs_submit_compressed_write(ordered,
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
async_chunk->write_flags, true);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_free:
+ mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR);
+ PAGE_END_WRITEBACK);
free_async_extent_pages(async_extent);
goto done;
}
min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
+ struct btrfs_ordered_extent *ordered;
+
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
- ins.objectid, cur_alloc_size, 0,
- 1 << BTRFS_ORDERED_REGULAR,
- BTRFS_COMPRESS_NONE);
- if (ret)
+ ordered = btrfs_alloc_ordered_extent(inode, start, ram_size,
+ ram_size, ins.objectid, cur_alloc_size,
+ 0, 1 << BTRFS_ORDERED_REGULAR,
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ ret = PTR_ERR(ordered);
goto out_drop_extent_cache;
+ }
if (btrfs_is_data_reloc_root(root)) {
- ret = btrfs_reloc_clone_csums(inode, start,
- cur_alloc_size);
+ ret = btrfs_reloc_clone_csums(ordered);
+
/*
* Only drop cache here, and process as normal.
*
start + ram_size - 1,
false);
}
+ btrfs_put_ordered_extent(ordered);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
* always adjust ->async_delalloc_pages as its paired with the init
- * happening in cow_file_range_async
+ * happening in run_delalloc_compressed
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
kvfree(async_cow);
}
-static int cow_file_range_async(struct btrfs_inode *inode,
- struct writeback_control *wbc,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+static bool run_delalloc_compressed(struct btrfs_inode *inode,
+ struct writeback_control *wbc,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
- u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
- bool should_compress;
unsigned nofs_flag;
const blk_opf_t write_flags = wbc_to_write_flags(wbc);
- unlock_extent(&inode->io_tree, start, end, NULL);
-
- if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
- num_chunks = 1;
- should_compress = false;
- } else {
- should_compress = true;
- }
-
nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
+ if (!ctx)
+ return false;
- if (!ctx) {
- unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
- EXTENT_DO_ACCOUNTING;
- unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR;
-
- extent_clear_unlock_delalloc(inode, start, end, locked_page,
- clear_bits, page_ops);
- return -ENOMEM;
- }
+ unlock_extent(&inode->io_tree, start, end, NULL);
+ set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);
for (i = 0; i < num_chunks; i++) {
- if (should_compress)
- cur_end = min(end, start + SZ_512K - 1);
- else
- cur_end = end;
+ u64 cur_end = min(end, start + SZ_512K - 1);
/*
* igrab is called higher up in the call chain, take only the
start = cur_end + 1;
}
*page_started = 1;
- return 0;
+ return true;
}
static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ struct writeback_control *wbc)
{
u64 done_offset = end;
int ret;
account_page_redirty(locked_page);
}
locked_page_done = true;
- extent_write_locked_range(&inode->vfs_inode, start, done_offset);
-
+ extent_write_locked_range(&inode->vfs_inode, start, done_offset,
+ wbc);
start = done_offset + 1;
}
ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
key->offset - args->extent_offset,
- args->disk_bytenr, false, path);
+ args->disk_bytenr, args->strict, path);
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
if (ret != 0)
goto out;
nocow_args.writeback_path = true;
while (1) {
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
u64 ram_bytes;
u64 nocow_end;
int extent_type;
+ bool is_prealloc;
nocow = false;
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ is_prealloc = extent_type == BTRFS_FILE_EXTENT_PREALLOC;
+ if (is_prealloc) {
u64 orig_start = found_key.offset - nocow_args.extent_offset;
struct extent_map *em;
goto error;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode,
- cur_offset, nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes, 0,
- 1 << BTRFS_ORDERED_PREALLOC,
- BTRFS_COMPRESS_NONE);
- if (ret) {
+ }
+
+ ordered = btrfs_alloc_ordered_extent(inode, cur_offset,
+ nocow_args.num_bytes, nocow_args.num_bytes,
+ nocow_args.disk_bytenr, nocow_args.num_bytes, 0,
+ is_prealloc
+ ? (1 << BTRFS_ORDERED_PREALLOC)
+ : (1 << BTRFS_ORDERED_NOCOW),
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ if (is_prealloc) {
btrfs_drop_extent_map_range(inode, cur_offset,
nocow_end, false);
- goto error;
}
- } else {
- ret = btrfs_add_ordered_extent(inode, cur_offset,
- nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes,
- 0,
- 1 << BTRFS_ORDERED_NOCOW,
- BTRFS_COMPRESS_NONE);
- if (ret)
- goto error;
+ ret = PTR_ERR(ordered);
+ goto error;
}
if (nocow) {
* extent_clear_unlock_delalloc() in error handler
* from freeing metadata of created ordered extent.
*/
- ret = btrfs_reloc_clone_csums(inode, cur_offset,
- nocow_args.num_bytes);
+ ret = btrfs_reloc_clone_csums(ordered);
+ btrfs_put_ordered_extent(ordered);
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, EXTENT_LOCKED |
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
- int ret;
+ int ret = 0;
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
/*
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
- } else if (!btrfs_inode_can_compress(inode) ||
- !inode_need_compress(inode, start, end)) {
- if (zoned)
- ret = run_delalloc_zoned(inode, locked_page, start, end,
- page_started, nr_written);
- else
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1, NULL);
- } else {
- set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
- ret = cow_file_range_async(inode, wbc, locked_page, start, end,
- page_started, nr_written);
+ goto out;
}
+
+ if (btrfs_inode_can_compress(inode) &&
+ inode_need_compress(inode, start, end) &&
+ run_delalloc_compressed(inode, wbc, locked_page, start,
+ end, page_started, nr_written))
+ goto out;
+
+ if (zoned)
+ ret = run_delalloc_zoned(inode, locked_page, start, end,
+ page_started, nr_written, wbc);
+ else
+ ret = cow_file_range(inode, locked_page, start, end,
+ page_started, nr_written, 1, NULL);
+
+out:
ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
}
}
-/*
- * Split off the first pre bytes from the extent_map at [start, start + len]
- *
- * This function is intended to be used only for extract_ordered_extent().
- */
-static int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre)
-{
- struct extent_map_tree *em_tree = &inode->extent_tree;
- struct extent_map *em;
- struct extent_map *split_pre = NULL;
- struct extent_map *split_mid = NULL;
- int ret = 0;
- unsigned long flags;
-
- ASSERT(pre != 0);
- ASSERT(pre < len);
-
- split_pre = alloc_extent_map();
- if (!split_pre)
- return -ENOMEM;
- split_mid = alloc_extent_map();
- if (!split_mid) {
- ret = -ENOMEM;
- goto out_free_pre;
- }
-
- lock_extent(&inode->io_tree, start, start + len - 1, NULL);
- write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (!em) {
- ret = -EIO;
- goto out_unlock;
- }
-
- ASSERT(em->len == len);
- ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
- ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
- ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
- ASSERT(!list_empty(&em->list));
-
- flags = em->flags;
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- /* First, replace the em with a new extent_map starting from * em->start */
- split_pre->start = em->start;
- split_pre->len = pre;
- split_pre->orig_start = split_pre->start;
- split_pre->block_start = em->block_start;
- split_pre->block_len = split_pre->len;
- split_pre->orig_block_len = split_pre->block_len;
- split_pre->ram_bytes = split_pre->len;
- split_pre->flags = flags;
- split_pre->compress_type = em->compress_type;
- split_pre->generation = em->generation;
-
- replace_extent_mapping(em_tree, em, split_pre, 1);
-
- /*
- * Now we only have an extent_map at:
- * [em->start, em->start + pre]
- */
-
- /* Insert the middle extent_map. */
- split_mid->start = em->start + pre;
- split_mid->len = em->len - pre;
- split_mid->orig_start = split_mid->start;
- split_mid->block_start = em->block_start + pre;
- split_mid->block_len = split_mid->len;
- split_mid->orig_block_len = split_mid->block_len;
- split_mid->ram_bytes = split_mid->len;
- split_mid->flags = flags;
- split_mid->compress_type = em->compress_type;
- split_mid->generation = em->generation;
- add_extent_mapping(em_tree, split_mid, 1);
-
- /* Once for us */
- free_extent_map(em);
- /* Once for the tree */
- free_extent_map(em);
-
-out_unlock:
- write_unlock(&em_tree->lock);
- unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
- free_extent_map(split_mid);
-out_free_pre:
- free_extent_map(split_pre);
- return ret;
-}
-
-int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
- struct btrfs_ordered_extent *ordered)
+static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
+ struct btrfs_ordered_extent *ordered)
{
u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 len = bbio->bio.bi_iter.bi_size;
- struct btrfs_inode *inode = bbio->inode;
- u64 ordered_len = ordered->num_bytes;
- int ret = 0;
+ struct btrfs_ordered_extent *new;
+ int ret;
/* Must always be called for the beginning of an ordered extent. */
if (WARN_ON_ONCE(start != ordered->disk_bytenr))
return -EINVAL;
/* No need to split if the ordered extent covers the entire bio. */
- if (ordered->disk_num_bytes == len)
+ if (ordered->disk_num_bytes == len) {
+ refcount_inc(&ordered->refs);
+ bbio->ordered = ordered;
return 0;
-
- ret = btrfs_split_ordered_extent(ordered, len);
- if (ret)
- return ret;
+ }
/*
* Don't split the extent_map for NOCOW extents, as we're writing into
* a pre-existing one.
*/
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- return 0;
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+ ret = split_extent_map(bbio->inode, bbio->file_offset,
+ ordered->num_bytes, len,
+ ordered->disk_bytenr);
+ if (ret)
+ return ret;
+ }
- return split_extent_map(inode, bbio->file_offset, ordered_len, len);
+ new = btrfs_split_ordered_extent(ordered, len);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ bbio->ordered = new;
+ return 0;
}
/*
trans->adding_csums = true;
if (!csum_root)
csum_root = btrfs_csum_root(trans->fs_info,
- sum->bytenr);
+ sum->logical);
ret = btrfs_csum_file_blocks(trans, csum_root, sum);
trans->adding_csums = false;
if (ret)
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
- EXTENT_DELALLOC_NEW, cached_state,
- GFP_NOFS);
+ EXTENT_DELALLOC_NEW, cached_state);
next:
search_start = extent_map_end(em);
free_extent_map(em);
return ret;
}
- return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
- cached_state);
+ return set_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC | extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
clear_page_dirty_for_io(page);
- SetPageError(page);
}
btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
unlock_page(page);
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
-int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
+int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
{
struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
struct btrfs_root *root = inode->root;
goto out;
}
- /* A valid ->physical implies a write on a sequential zone. */
- if (ordered_extent->physical != (u64)-1) {
- btrfs_rewrite_logical_zoned(ordered_extent);
- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes);
- } else if (btrfs_is_data_reloc_root(inode->root)) {
+ if (btrfs_is_zoned(fs_info))
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
- }
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
return ret;
}
+int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
+{
+ if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
+ !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
+ btrfs_finish_ordered_zoned(ordered);
+ return btrfs_finish_one_ordered(ordered);
+}
+
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate)
void btrfs_add_delayed_iput(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned long flags;
if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
return;
atomic_inc(&fs_info->nr_delayed_iputs);
- spin_lock(&fs_info->delayed_iput_lock);
+ /*
+ * Need to be irq safe here because we can be called from either an irq
+ * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
+ * context.
+ */
+ spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
ASSERT(list_empty(&inode->delayed_iput));
list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
wake_up_process(fs_info->cleaner_kthread);
}
struct btrfs_inode *inode)
{
list_del_init(&inode->delayed_iput);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode);
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
wake_up(&fs_info->delayed_iputs_wait);
- spin_lock(&fs_info->delayed_iput_lock);
+ spin_lock_irq(&fs_info->delayed_iput_lock);
}
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode)
{
if (!list_empty(&inode->delayed_iput)) {
- spin_lock(&fs_info->delayed_iput_lock);
+ spin_lock_irq(&fs_info->delayed_iput_lock);
if (!list_empty(&inode->delayed_iput))
run_delayed_iput_locked(fs_info, inode);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
}
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{
-
- spin_lock(&fs_info->delayed_iput_lock);
+ /*
+ * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
+ * calls btrfs_add_delayed_iput() and that needs to lock
+ * fs_info->delayed_iput_lock. So we need to disable irqs here to
+ * prevent a deadlock.
+ */
+ spin_lock_irq(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode;
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
- cond_resched_lock(&fs_info->delayed_iput_lock);
+ if (need_resched()) {
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
+ cond_resched();
+ spin_lock_irq(&fs_info->delayed_iput_lock);
+ }
}
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
}
/*
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(fs_info->sb, last_objectid, root);
- ret = PTR_ERR_OR_ZERO(inode);
- if (ret && ret != -ENOENT)
- goto out;
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
+ if (ret != -ENOENT)
+ goto out;
+ }
- if (ret == -ENOENT && root == fs_info->tree_root) {
+ if (!inode && root == fs_info->tree_root) {
struct btrfs_root *dead_root;
int is_dead_root = 0;
* deleted but wasn't. The inode number may have been reused,
* but either way, we can delete the orphan item.
*/
- if (ret == -ENOENT || inode->i_nlink) {
- if (!ret) {
+ if (!inode || inode->i_nlink) {
+ if (inode) {
ret = btrfs_drop_verity_items(BTRFS_I(inode));
iput(inode);
+ inode = NULL;
if (ret)
goto out;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- iput(inode);
goto out;
}
btrfs_debug(fs_info, "auto deleting %Lu",
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
btrfs_end_transaction(trans);
- if (ret) {
- iput(inode);
+ if (ret)
goto out;
- }
continue;
}
}
btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
- 0);
+ false);
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
if (only_release_metadata)
set_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_NORESERVE, NULL, GFP_NOFS);
+ EXTENT_NORESERVE, NULL);
out_unlock:
if (ret) {
static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct inode *inode,
struct btrfs_dio_data *dio_data,
- u64 start, u64 len,
+ u64 start, u64 *lenp,
unsigned int iomap_flags)
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
struct btrfs_block_group *bg;
bool can_nocow = false;
bool space_reserved = false;
+ u64 len = *lenp;
u64 prev_len;
int ret = 0;
free_extent_map(em);
*map = NULL;
- if (nowait)
- return -EAGAIN;
+ if (nowait) {
+ ret = -EAGAIN;
+ goto out;
+ }
/*
* If we could not allocate data space before locking the file
* range and we can't do a NOCOW write, then we have to fail.
*/
- if (!dio_data->data_space_reserved)
- return -ENOSPC;
+ if (!dio_data->data_space_reserved) {
+ ret = -ENOSPC;
+ goto out;
+ }
/*
* We have to COW and we have already reserved data space before,
btrfs_delalloc_release_extents(BTRFS_I(inode), len);
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
}
+ *lenp = len;
return ret;
}
if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
- start, len, flags);
+ start, &len, flags);
if (ret < 0)
goto unlock_err;
unlock_extents = true;
pos += submitted;
length -= submitted;
if (write)
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL,
- pos, length, false);
+ btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+ pos, length, false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
dip->file_offset, dip->bytes, bio->bi_status);
}
- if (btrfs_op(bio) == BTRFS_MAP_WRITE)
- btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
- dip->bytes, !bio->bi_status);
- else
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+ btrfs_finish_ordered_extent(bbio->ordered, NULL,
+ dip->file_offset, dip->bytes,
+ !bio->bi_status);
+ } else {
unlock_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
+ }
bbio->bio.bi_private = bbio->private;
iomap_dio_bio_end_io(bio);
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) {
- btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
+ bbio->bio.bi_status = errno_to_blk_status(ret);
+ btrfs_dio_end_io(bbio);
return;
}
}
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
- u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
+ const u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(&inode->vfs_inode,
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, false);
- BUG_ON(ret);
+ /*
+ * We have reserved 2 metadata units when we started the transaction and
+ * min_size matches 1 unit, so this should never fail, but if it does,
+ * it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret)) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
trans->block_rsv = rsv;
btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, false);
- BUG_ON(ret); /* shouldn't happen */
+ /*
+ * We have reserved 2 metadata units when we started the
+ * transaction and min_size matches 1 unit, so this should never
+ * fail, but if it does, it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret))
+ break;
+
trans->block_rsv = rsv;
}
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
- atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_bytes, blocksize) +
- ALIGN(delalloc_bytes, blocksize)) >> 9;
+ ALIGN(delalloc_bytes, blocksize)) >> SECTOR_SHIFT;
return 0;
}
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
- BTRFS_I(new_inode), 1);
+ BTRFS_I(new_inode), true);
}
/* src is a subvolume */
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
+ struct btrfs_ordered_extent *ordered;
int compression;
size_t orig_count;
u64 start, end;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
+ ordered = btrfs_alloc_ordered_extent(inode, start, num_bytes, ram_bytes,
ins.objectid, ins.offset,
encoded->unencoded_offset,
(1 << BTRFS_ORDERED_ENCODED) |
(1 << BTRFS_ORDERED_COMPRESSED),
compression);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserved;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_delalloc_release_extents(inode, num_bytes);
- btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
- ins.offset, pages, nr_pages, 0, false);
+ btrfs_submit_compressed_write(ordered, pages, nr_pages, 0, false);
ret = orig_count;
goto out;
.read_folio = btrfs_read_folio,
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
- .direct_IO = noop_direct_IO,
.invalidate_folio = btrfs_invalidate_folio,
.release_folio = btrfs_release_folio,
.migrate_folio = btrfs_migrate_folio,