From 4849a01390dce54b129b3aea51a60bfb4e3968a4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 5 Jul 2019 13:15:58 +0200 Subject: [PATCH] 4.19-stable patches added patches: btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch --- ...oesn-t-have-pending-chunk-allocation.patch | 120 ++++++++++++++++++ queue-4.19/series | 1 + 2 files changed, 121 insertions(+) create mode 100644 queue-4.19/btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch diff --git a/queue-4.19/btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch b/queue-4.19/btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch new file mode 100644 index 00000000000..0bca80e0e19 --- /dev/null +++ b/queue-4.19/btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch @@ -0,0 +1,120 @@ +From debd1c065d2037919a7da67baf55cc683fee09f0 Mon Sep 17 00:00:00 2001 +From: Nikolay Borisov +Date: Fri, 17 May 2019 10:44:25 +0300 +Subject: btrfs: Ensure replaced device doesn't have pending chunk allocation + +From: Nikolay Borisov + +commit debd1c065d2037919a7da67baf55cc683fee09f0 upstream. + +Recent FITRIM work, namely bbbf7243d62d ("btrfs: combine device update +operations during transaction commit") combined the way certain +operations are recoded in a transaction. As a result an ASSERT was added +in dev_replace_finish to ensure the new code works correctly. +Unfortunately I got reports that it's possible to trigger the assert, +meaning that during a device replace it's possible to have an unfinished +chunk allocation on the source device. + +This is supposed to be prevented by the fact that a transaction is +committed before finishing the replace oepration and alter acquiring the +chunk mutex. This is not sufficient since by the time the transaction is +committed and the chunk mutex acquired it's possible to allocate a chunk +depending on the workload being executed on the replaced device. This +bug has been present ever since device replace was introduced but there +was never code which checks for it. + +The correct way to fix is to ensure that there is no pending device +modification operation when the chunk mutex is acquire and if there is +repeat transaction commit. Unfortunately it's not possible to just +exclude the source device from btrfs_fs_devices::dev_alloc_list since +this causes ENOSPC to be hit in transaction commit. + +Fixing that in another way would need to add special cases to handle the +last writes and forbid new ones. The looped transaction fix is more +obvious, and can be easily backported. The runtime of dev-replace is +long so there's no noticeable delay caused by that. + +Reported-by: David Sterba +Fixes: 391cd9df81ac ("Btrfs: fix unprotected alloc list insertion during the finishing procedure of replace") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + + +--- + fs/btrfs/dev-replace.c | 26 +++++++++++++++++--------- + fs/btrfs/volumes.c | 2 ++ + fs/btrfs/volumes.h | 5 +++++ + 3 files changed, 24 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -599,17 +599,25 @@ static int btrfs_dev_replace_finishing(s + } + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + +- trans = btrfs_start_transaction(root, 0); +- if (IS_ERR(trans)) { +- mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); +- return PTR_ERR(trans); ++ while (1) { ++ trans = btrfs_start_transaction(root, 0); ++ if (IS_ERR(trans)) { ++ mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); ++ return PTR_ERR(trans); ++ } ++ ret = btrfs_commit_transaction(trans); ++ WARN_ON(ret); ++ /* keep away write_all_supers() during the finishing procedure */ ++ mutex_lock(&fs_info->fs_devices->device_list_mutex); ++ mutex_lock(&fs_info->chunk_mutex); ++ if (src_device->has_pending_chunks) { ++ mutex_unlock(&root->fs_info->chunk_mutex); ++ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); ++ } else { ++ break; ++ } + } +- ret = btrfs_commit_transaction(trans); +- WARN_ON(ret); + +- /* keep away write_all_supers() during the finishing procedure */ +- mutex_lock(&fs_info->fs_devices->device_list_mutex); +- mutex_lock(&fs_info->chunk_mutex); + btrfs_dev_replace_write_lock(dev_replace); + dev_replace->replace_state = + scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -4873,6 +4873,7 @@ static int __btrfs_alloc_chunk(struct bt + for (i = 0; i < map->num_stripes; i++) { + num_bytes = map->stripes[i].dev->bytes_used + stripe_size; + btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); ++ map->stripes[i].dev->has_pending_chunks = true; + } + + atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space); +@@ -7348,6 +7349,7 @@ void btrfs_update_commit_device_bytes_us + for (i = 0; i < map->num_stripes; i++) { + dev = map->stripes[i].dev; + dev->commit_bytes_used = dev->bytes_used; ++ dev->has_pending_chunks = false; + } + } + mutex_unlock(&fs_info->chunk_mutex); +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -54,6 +54,11 @@ struct btrfs_device { + + spinlock_t io_lock ____cacheline_aligned; + int running_pending; ++ /* When true means this device has pending chunk alloc in ++ * current transaction. Protected by chunk_mutex. ++ */ ++ bool has_pending_chunks; ++ + /* regular prio bios */ + struct btrfs_pending_bios pending_bios; + /* sync bios */ diff --git a/queue-4.19/series b/queue-4.19/series index c102b416cb7..0c6654d30a6 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -59,3 +59,4 @@ drm-imx-notify-drm-core-before-sending-event-during-crtc-disable.patch drm-imx-only-send-event-on-crtc-disable-if-kept-disabled.patch ftrace-x86-remove-possible-deadlock-between-register_kprobe-and-ftrace_run_update_code.patch mm-vmscan.c-prevent-useless-kswapd-loops.patch +btrfs-ensure-replaced-device-doesn-t-have-pending-chunk-allocation.patch -- 2.47.3