From 8b99ad8cae465d87641c2b9b1ffb83f2865c5f6a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Jun 2018 06:36:45 +0200 Subject: [PATCH] 4.14-stable patches added patches: btrfs-fix-scrub-to-repair-raid6-corruption.patch btrfs-make-raid6-rebuild-retry-more.patch revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch --- ...fix-scrub-to-repair-raid6-corruption.patch | 127 ++++++++++++++++++ .../btrfs-make-raid6-rebuild-retry-more.patch | 94 +++++++++++++ ...fix-scrub-to-repair-raid6-corruption.patch | 68 ++++++++++ queue-4.14/series | 3 + 4 files changed, 292 insertions(+) create mode 100644 queue-4.14/btrfs-fix-scrub-to-repair-raid6-corruption.patch create mode 100644 queue-4.14/btrfs-make-raid6-rebuild-retry-more.patch create mode 100644 queue-4.14/revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch diff --git a/queue-4.14/btrfs-fix-scrub-to-repair-raid6-corruption.patch b/queue-4.14/btrfs-fix-scrub-to-repair-raid6-corruption.patch new file mode 100644 index 00000000000..4405a6a567d --- /dev/null +++ b/queue-4.14/btrfs-fix-scrub-to-repair-raid6-corruption.patch @@ -0,0 +1,127 @@ +From Alexander.Levin@microsoft.com Mon Jun 18 06:35:35 2018 +From: Sasha Levin +Date: Fri, 15 Jun 2018 02:39:02 +0000 +Subject: Btrfs: fix scrub to repair raid6 corruption +To: "gregkh@linuxfoundation.org" +Cc: "ben.hutchings@codethink.co.uk" , "stable@vger.kernel.org" , Liu Bo , David Sterba , Sasha Levin +Message-ID: <20180615023842.99130-2-alexander.levin@microsoft.com> + + +From: Liu Bo + +[ Upstream commit 762221f095e3932669093466aaf4b85ed9ad2ac1 ] + +The raid6 corruption is that, +suppose that all disks can be read without problems and if the content +that was read out doesn't match its checksum, currently for raid6 +btrfs at most retries twice, + +- the 1st retry is to rebuild with all other stripes, it'll eventually + be a raid5 xor rebuild, +- if the 1st fails, the 2nd retry will deliberately fail parity p so + that it will do raid6 style rebuild, + +however, the chances are that another non-parity stripe content also +has something corrupted, so that the above retries are not able to +return correct content. + +We've fixed normal reads to rebuild raid6 correctly with more retries +in Patch "Btrfs: make raid6 rebuild retry more"[1], this is to fix +scrub to do the exactly same rebuild process. + +[1]: https://patchwork.kernel.org/patch/10091755/ + +Signed-off-by: Liu Bo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/scrub.c | 44 ++++++++++++++++++++++++++++++++------------ + 1 file changed, 32 insertions(+), 12 deletions(-) + +--- a/fs/btrfs/scrub.c ++++ b/fs/btrfs/scrub.c +@@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(st + static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); + static void scrub_put_ctx(struct scrub_ctx *sctx); + ++static inline int scrub_is_page_on_raid56(struct scrub_page *page) ++{ ++ return page->recover && ++ (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); ++} + + static void scrub_pending_bio_inc(struct scrub_ctx *sctx) + { +@@ -1323,15 +1328,34 @@ nodatasum_case: + * could happen otherwise that a correct page would be + * overwritten by a bad one). + */ +- for (mirror_index = 0; +- mirror_index < BTRFS_MAX_MIRRORS && +- sblocks_for_recheck[mirror_index].page_count > 0; +- mirror_index++) { ++ for (mirror_index = 0; ;mirror_index++) { + struct scrub_block *sblock_other; + + if (mirror_index == failed_mirror_index) + continue; +- sblock_other = sblocks_for_recheck + mirror_index; ++ ++ /* raid56's mirror can be more than BTRFS_MAX_MIRRORS */ ++ if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) { ++ if (mirror_index >= BTRFS_MAX_MIRRORS) ++ break; ++ if (!sblocks_for_recheck[mirror_index].page_count) ++ break; ++ ++ sblock_other = sblocks_for_recheck + mirror_index; ++ } else { ++ struct scrub_recover *r = sblock_bad->pagev[0]->recover; ++ int max_allowed = r->bbio->num_stripes - ++ r->bbio->num_tgtdevs; ++ ++ if (mirror_index >= max_allowed) ++ break; ++ if (!sblocks_for_recheck[1].page_count) ++ break; ++ ++ ASSERT(failed_mirror_index == 0); ++ sblock_other = sblocks_for_recheck + 1; ++ sblock_other->pagev[0]->mirror_num = 1 + mirror_index; ++ } + + /* build and submit the bios, check checksums */ + scrub_recheck_block(fs_info, sblock_other, 0); +@@ -1679,18 +1703,13 @@ static void scrub_bio_wait_endio(struct + complete(&ret->event); + } + +-static inline int scrub_is_page_on_raid56(struct scrub_page *page) +-{ +- return page->recover && +- (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); +-} +- + static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, + struct bio *bio, + struct scrub_page *page) + { + struct scrub_bio_ret done; + int ret; ++ int mirror_num; + + init_completion(&done.event); + done.status = 0; +@@ -1698,9 +1717,10 @@ static int scrub_submit_raid56_bio_wait( + bio->bi_private = &done; + bio->bi_end_io = scrub_bio_wait_endio; + ++ mirror_num = page->sblock->pagev[0]->mirror_num; + ret = raid56_parity_recover(fs_info, bio, page->recover->bbio, + page->recover->map_length, +- page->mirror_num, 0); ++ mirror_num, 0); + if (ret) + return ret; + diff --git a/queue-4.14/btrfs-make-raid6-rebuild-retry-more.patch b/queue-4.14/btrfs-make-raid6-rebuild-retry-more.patch new file mode 100644 index 00000000000..ce6d29952d0 --- /dev/null +++ b/queue-4.14/btrfs-make-raid6-rebuild-retry-more.patch @@ -0,0 +1,94 @@ +From Alexander.Levin@microsoft.com Mon Jun 18 06:35:49 2018 +From: Sasha Levin +Date: Fri, 15 Jun 2018 02:39:04 +0000 +Subject: Btrfs: make raid6 rebuild retry more +To: "gregkh@linuxfoundation.org" +Cc: "ben.hutchings@codethink.co.uk" , "stable@vger.kernel.org" , Liu Bo , David Sterba , Sasha Levin +Message-ID: <20180615023842.99130-3-alexander.levin@microsoft.com> + + +From: Liu Bo + +[ Upstream commit 8810f7517a3bc4ca2d41d022446d3f5fd6b77c09 ] + +There is a scenario that can end up with rebuild process failing to +return good content, i.e. +suppose that all disks can be read without problems and if the content +that was read out doesn't match its checksum, currently for raid6 +btrfs at most retries twice, + +- the 1st retry is to rebuild with all other stripes, it'll eventually + be a raid5 xor rebuild, +- if the 1st fails, the 2nd retry will deliberately fail parity p so + that it will do raid6 style rebuild, + +however, the chances are that another non-parity stripe content also +has something corrupted, so that the above retries are not able to +return correct content, and users will think of this as data loss. +More seriouly, if the loss happens on some important internal btree +roots, it could refuse to mount. + +This extends btrfs to do more retries and each retry fails only one +stripe. Since raid6 can tolerate 2 disk failures, if there is one +more failure besides the failure on which we're recovering, this can +always work. + +The worst case is to retry as many times as the number of raid6 disks, +but given the fact that such a scenario is really rare in practice, +it's still acceptable. + +Signed-off-by: Liu Bo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/raid56.c | 18 ++++++++++++++---- + fs/btrfs/volumes.c | 9 ++++++++- + 2 files changed, 22 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/raid56.c ++++ b/fs/btrfs/raid56.c +@@ -2172,11 +2172,21 @@ int raid56_parity_recover(struct btrfs_f + } + + /* +- * reconstruct from the q stripe if they are +- * asking for mirror 3 ++ * Loop retry: ++ * for 'mirror == 2', reconstruct from all other stripes. ++ * for 'mirror_num > 2', select a stripe to fail on every retry. + */ +- if (mirror_num == 3) +- rbio->failb = rbio->real_stripes - 2; ++ if (mirror_num > 2) { ++ /* ++ * 'mirror == 3' is to fail the p stripe and ++ * reconstruct from the q stripe. 'mirror > 3' is to ++ * fail a data stripe and reconstruct from p+q stripe. ++ */ ++ rbio->failb = rbio->real_stripes - (mirror_num - 1); ++ ASSERT(rbio->failb > 0); ++ if (rbio->failb <= rbio->faila) ++ rbio->failb--; ++ } + + ret = lock_stripe_add(rbio); + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -5110,7 +5110,14 @@ int btrfs_num_copies(struct btrfs_fs_inf + else if (map->type & BTRFS_BLOCK_GROUP_RAID5) + ret = 2; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) +- ret = 3; ++ /* ++ * There could be two corrupted data stripes, we need ++ * to loop retry in order to rebuild the correct data. ++ * ++ * Fail a stripe at a time on every retry except the ++ * stripe under reconstruction. ++ */ ++ ret = map->num_stripes; + else + ret = 1; + free_extent_map(em); diff --git a/queue-4.14/revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch b/queue-4.14/revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch new file mode 100644 index 00000000000..efe6206c399 --- /dev/null +++ b/queue-4.14/revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch @@ -0,0 +1,68 @@ +From Alexander.Levin@microsoft.com Mon Jun 18 06:35:07 2018 +From: Sasha Levin +Date: Fri, 15 Jun 2018 02:39:01 +0000 +Subject: Revert "Btrfs: fix scrub to repair raid6 corruption" +To: "gregkh@linuxfoundation.org" +Cc: "ben.hutchings@codethink.co.uk" , "stable@vger.kernel.org" , Sasha Levin +Message-ID: <20180615023842.99130-1-alexander.levin@microsoft.com> + +From: Sasha Levin + +This reverts commit d91bb7c6988bd6450284c762b33f2e1ea3fe7c97. + +This commit used an incorrect log message. + +Signed-off-by: Sasha Levin +Reported-by: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/raid56.c | 18 ++++-------------- + fs/btrfs/volumes.c | 9 +-------- + 2 files changed, 5 insertions(+), 22 deletions(-) + +--- a/fs/btrfs/raid56.c ++++ b/fs/btrfs/raid56.c +@@ -2172,21 +2172,11 @@ int raid56_parity_recover(struct btrfs_f + } + + /* +- * Loop retry: +- * for 'mirror == 2', reconstruct from all other stripes. +- * for 'mirror_num > 2', select a stripe to fail on every retry. ++ * reconstruct from the q stripe if they are ++ * asking for mirror 3 + */ +- if (mirror_num > 2) { +- /* +- * 'mirror == 3' is to fail the p stripe and +- * reconstruct from the q stripe. 'mirror > 3' is to +- * fail a data stripe and reconstruct from p+q stripe. +- */ +- rbio->failb = rbio->real_stripes - (mirror_num - 1); +- ASSERT(rbio->failb > 0); +- if (rbio->failb <= rbio->faila) +- rbio->failb--; +- } ++ if (mirror_num == 3) ++ rbio->failb = rbio->real_stripes - 2; + + ret = lock_stripe_add(rbio); + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -5110,14 +5110,7 @@ int btrfs_num_copies(struct btrfs_fs_inf + else if (map->type & BTRFS_BLOCK_GROUP_RAID5) + ret = 2; + else if (map->type & BTRFS_BLOCK_GROUP_RAID6) +- /* +- * There could be two corrupted data stripes, we need +- * to loop retry in order to rebuild the correct data. +- * +- * Fail a stripe at a time on every retry except the +- * stripe under reconstruction. +- */ +- ret = map->num_stripes; ++ ret = 3; + else + ret = 1; + free_extent_map(em); diff --git a/queue-4.14/series b/queue-4.14/series index e11dfd393c9..6614f203a6d 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -183,3 +183,6 @@ efi-libstub-arm64-handle-randomized-text_offset.patch arm-8753-1-decompressor-add-a-missing-parameter-to-the-addruart-macro.patch arm-8758-1-decompressor-restore-r1-and-r2-just-before-jumping-to-the-kernel.patch arm-kexec-fix-kdump-register-saving-on-panic.patch +revert-btrfs-fix-scrub-to-repair-raid6-corruption.patch +btrfs-fix-scrub-to-repair-raid6-corruption.patch +btrfs-make-raid6-rebuild-retry-more.patch -- 2.47.3