/* helper to cleanup workers */
static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
{
- btrfs_destroy_workqueue(fs_info->fixup_workers);
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->workers);
if (fs_info->endio_workers)
fs_info->caching_workers =
btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
- fs_info->fixup_workers =
- btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags);
-
fs_info->endio_workers =
alloc_workqueue("btrfs-endio", flags, max_active);
fs_info->endio_meta_workers =
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_write_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
- fs_info->caching_workers && fs_info->fixup_workers &&
+ fs_info->caching_workers &&
fs_info->delayed_workers && fs_info->qgroup_rescan_workers &&
fs_info->discard_ctl.discard_workers)) {
return -ENOMEM;
if (unlikely(BTRFS_FS_ERROR(fs_info)))
btrfs_error_commit_super(fs_info);
- /*
- * Wait for any fixup workers to complete.
- * If we don't wait for them here and they are still running by the time
- * we call kthread_stop() against the cleaner kthread further below, we
- * get an use-after-free on the cleaner because the fixup worker adds an
- * inode to the list of delayed iputs and then attempts to wakeup the
- * cleaner kthread, which was already stopped and destroyed. We parked
- * already the cleaner, but below we run all pending delayed iputs.
- */
- btrfs_flush_workqueue(fs_info->fixup_workers);
/*
* Similar case here, we have to wait for delalloc workers before we
* proceed below and stop the cleaner kthread, otherwise we trigger a
ASSERT(end <= folio_end, "start=%llu len=%u folio_start=%llu folio_size=%zu",
start, len, folio_start, folio_size(folio));
- ret = btrfs_writepage_cow_fixup(folio);
- if (ret == -EAGAIN) {
- /* Fixup worker will requeue */
- folio_redirty_for_writepage(bio_ctrl->wbc, folio);
- folio_unlock(folio);
- return 1;
- }
- if (ret < 0) {
+ if (unlikely(!folio_test_ordered(folio))) {
+ DEBUG_WARN();
+ btrfs_err_rl(fs_info,
+ "root %lld ino %llu folio %llu is marked dirty without notifying the fs",
+ btrfs_root_id(inode->root),
+ btrfs_ino(inode),
+ folio_pos(folio));
btrfs_folio_clear_dirty(fs_info, folio, start, len);
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
- return ret;
+ return -EUCLEAN;
}
bitmap_set(&range_bitmap, (start - folio_pos(folio)) >> fs_info->sectorsize_bits,
*
* So here we check if the page has private set to rule out such
* case.
- * But we also have a long history of relying on the COW fixup,
- * so here we only enable this check for experimental builds until
- * we're sure it's safe.
*/
- if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL) &&
- unlikely(!folio_test_private(folio))) {
+ if (unlikely(!folio_test_private(folio))) {
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
btrfs_err_rl(fs_info,
"root %lld ino %llu folio %llu is marked dirty without notifying the fs",
EXTENT_DELALLOC | extra_bits, cached_state);
}
-/* see btrfs_writepage_start_hook for details on why this is required */
-struct btrfs_writepage_fixup {
- struct folio *folio;
- struct btrfs_inode *inode;
- struct btrfs_work work;
-};
-
-static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
-{
- struct btrfs_writepage_fixup *fixup =
- container_of(work, struct btrfs_writepage_fixup, work);
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- struct extent_changeset *data_reserved = NULL;
- struct folio *folio = fixup->folio;
- struct btrfs_inode *inode = fixup->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u64 page_start = folio_pos(folio);
- u64 page_end = folio_next_pos(folio) - 1;
- int ret = 0;
- bool free_delalloc_space = true;
-
- /*
- * This is similar to page_mkwrite, we need to reserve the space before
- * we take the folio lock.
- */
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
- folio_size(folio));
-again:
- folio_lock(folio);
-
- /*
- * Before we queued this fixup, we took a reference on the folio.
- * folio->mapping may go NULL, but it shouldn't be moved to a different
- * address space.
- */
- if (!folio->mapping || !folio_test_dirty(folio) ||
- !folio_test_checked(folio)) {
- /*
- * Unfortunately this is a little tricky, either
- *
- * 1) We got here and our folio had already been dealt with and
- * we reserved our space, thus ret == 0, so we need to just
- * drop our space reservation and bail. This can happen the
- * first time we come into the fixup worker, or could happen
- * while waiting for the ordered extent.
- * 2) Our folio was already dealt with, but we happened to get an
- * ENOSPC above from the btrfs_delalloc_reserve_space. In
- * this case we obviously don't have anything to release, but
- * because the folio was already dealt with we don't want to
- * mark the folio with an error, so make sure we're resetting
- * ret to 0. This is why we have this check _before_ the ret
- * check, because we do not want to have a surprise ENOSPC
- * when the folio was already properly dealt with.
- */
- if (!ret) {
- btrfs_delalloc_release_extents(inode, folio_size(folio));
- btrfs_delalloc_release_space(inode, data_reserved,
- page_start, folio_size(folio),
- true);
- }
- ret = 0;
- goto out_page;
- }
-
- /*
- * We can't mess with the folio state unless it is locked, so now that
- * it is locked bail if we failed to make our space reservation.
- */
- if (ret)
- goto out_page;
-
- btrfs_lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
-
- /* already ordered? We're done */
- if (folio_test_ordered(folio))
- goto out_reserved;
-
- ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
- if (ordered) {
- btrfs_unlock_extent(&inode->io_tree, page_start, page_end,
- &cached_state);
- folio_unlock(folio);
- btrfs_start_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
-
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
- &cached_state);
- if (ret)
- goto out_reserved;
-
- /*
- * Everything went as planned, we're now the owner of a dirty page with
- * delayed allocation bits set and space reserved for our COW
- * destination.
- *
- * The page was dirty when we started, nothing should have cleaned it.
- */
- BUG_ON(!folio_test_dirty(folio));
- free_delalloc_space = false;
-out_reserved:
- btrfs_delalloc_release_extents(inode, PAGE_SIZE);
- if (free_delalloc_space)
- btrfs_delalloc_release_space(inode, data_reserved, page_start,
- PAGE_SIZE, true);
- btrfs_unlock_extent(&inode->io_tree, page_start, page_end, &cached_state);
-out_page:
- if (ret) {
- /*
- * We hit ENOSPC or other errors. Update the mapping and page
- * to reflect the errors and clean the page.
- */
- mapping_set_error(folio->mapping, ret);
- btrfs_folio_clear_ordered(fs_info, folio, page_start,
- folio_size(folio));
- btrfs_mark_ordered_io_finished(inode, page_start,
- folio_size(folio), !ret);
- folio_clear_dirty_for_io(folio);
- }
- btrfs_folio_clear_checked(fs_info, folio, page_start, PAGE_SIZE);
- folio_unlock(folio);
- folio_put(folio);
- kfree(fixup);
- extent_changeset_free(data_reserved);
- /*
- * As a precaution, do a delayed iput in case it would be the last iput
- * that could need flushing space. Recursing back to fixup worker would
- * deadlock.
- */
- btrfs_add_delayed_iput(inode);
-}
-
-/*
- * There are a few paths in the higher layers of the kernel that directly
- * set the folio dirty bit without asking the filesystem if it is a
- * good idea. This causes problems because we want to make sure COW
- * properly happens and the data=ordered rules are followed.
- *
- * In our case any range that doesn't have the ORDERED bit set
- * hasn't been properly setup for IO. We kick off an async process
- * to fix it up. The async helper will wait for ordered extents, set
- * the delalloc bit and make it safe to write the folio.
- */
-int btrfs_writepage_cow_fixup(struct folio *folio)
-{
- struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct btrfs_writepage_fixup *fixup;
-
- /* This folio has ordered extent covering it already */
- if (folio_test_ordered(folio))
- return 0;
-
- /*
- * For experimental build, we error out instead of EAGAIN.
- *
- * We should not hit such out-of-band dirty folios anymore.
- */
- if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) {
- DEBUG_WARN();
- btrfs_err_rl(fs_info,
- "root %lld ino %llu folio %llu is marked dirty without notifying the fs",
- btrfs_root_id(BTRFS_I(inode)->root),
- btrfs_ino(BTRFS_I(inode)),
- folio_pos(folio));
- return -EUCLEAN;
- }
-
- /*
- * folio_checked is set below when we create a fixup worker for this
- * folio, don't try to create another one if we're already
- * folio_test_checked.
- *
- * The extent_io writepage code will redirty the foio if we send back
- * EAGAIN.
- */
- if (folio_test_checked(folio))
- return -EAGAIN;
-
- fixup = kzalloc_obj(*fixup, GFP_NOFS);
- if (!fixup)
- return -EAGAIN;
-
- /*
- * We are already holding a reference to this inode from
- * write_cache_pages. We need to hold it because the space reservation
- * takes place outside of the folio lock, and we can't trust
- * folio->mapping outside of the folio lock.
- */
- ihold(inode);
- btrfs_folio_set_checked(fs_info, folio, folio_pos(folio), folio_size(folio));
- folio_get(folio);
- btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL);
- fixup->folio = folio;
- fixup->inode = BTRFS_I(inode);
- btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
-
- return -EAGAIN;
-}
-
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 file_pos,
struct btrfs_file_extent_item *stack_fi,