From 4b231a6ab16ba4965740f6d470ca319a0d92f478 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 3 Sep 2014 14:33:13 -0700 Subject: [PATCH] 3.10-stable patches added patches: btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch --- ...ion-duplicate-and-outdated-checksums.patch | 106 ++++++++++++++++++ queue-3.10/series | 1 + 2 files changed, 107 insertions(+) create mode 100644 queue-3.10/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch diff --git a/queue-3.10/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch b/queue-3.10/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch new file mode 100644 index 00000000000..db4b5509373 --- /dev/null +++ b/queue-3.10/btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch @@ -0,0 +1,106 @@ +From 27b9a8122ff71a8cadfbffb9c4f0694300464f3b Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Sat, 9 Aug 2014 21:22:27 +0100 +Subject: Btrfs: fix csum tree corruption, duplicate and outdated checksums + +From: Filipe Manana + +commit 27b9a8122ff71a8cadfbffb9c4f0694300464f3b upstream. + +Under rare circumstances we can end up leaving 2 versions of a checksum +for the same file extent range. + +The reason for this is that after calling btrfs_next_leaf we process +slot 0 of the leaf it returns, instead of processing the slot set in +path->slots[0]. Most of the time (by far) path->slots[0] is 0, but after +btrfs_next_leaf() releases the path and before it searches for the next +leaf, another task might cause a split of the next leaf, which migrates +some of its keys to the leaf we were processing before calling +btrfs_next_leaf(). In this case btrfs_next_leaf() returns again the +same leaf but with path->slots[0] having a slot number corresponding +to the first new key it got, that is, a slot number that didn't exist +before calling btrfs_next_leaf(), as the leaf now has more keys than +it had before. So we must really process the returned leaf starting at +path->slots[0] always, as it isn't always 0, and the key at slot 0 can +have an offset much lower than our search offset/bytenr. + +For example, consider the following scenario, where we have: + +sums->bytenr: 40157184, sums->len: 16384, sums end: 40173568 +four 4kb file data blocks with offsets 40157184, 40161280, 40165376, 40169472 + + Leaf N: + + slot = 0 slot = btrfs_header_nritems() - 1 + |-------------------------------------------------------------------| + | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] | + |-------------------------------------------------------------------| + + Leaf N + 1: + + slot = 0 slot = btrfs_header_nritems() - 1 + |--------------------------------------------------------------------| + | [(CSUM CSUM 40161280), size 32] ... [((CSUM CSUM 40615936), size 8 | + |--------------------------------------------------------------------| + +Because we are at the last slot of leaf N, we call btrfs_next_leaf() to +find the next highest key, which releases the current path and then searches +for that next key. However after releasing the path and before finding that +next key, the item at slot 0 of leaf N + 1 gets moved to leaf N, due to a call +to ctree.c:push_leaf_left() (via ctree.c:split_leaf()), and therefore +btrfs_next_leaf() will returns us a path again with leaf N but with the slot +pointing to its new last key (CSUM CSUM 40161280). This new version of leaf N +is then: + + slot = 0 slot = btrfs_header_nritems() - 2 slot = btrfs_header_nritems() - 1 + |----------------------------------------------------------------------------------------------------| + | [(CSUM CSUM 39239680), size 8] ... [(CSUM CSUM 40116224), size 4] [(CSUM CSUM 40161280), size 32] | + |----------------------------------------------------------------------------------------------------| + +And incorrecly using slot 0, makes us set next_offset to 39239680 and we jump +into the "insert:" label, which will set tmp to: + + tmp = min((sums->len - total_bytes) >> blocksize_bits, + (next_offset - file_key.offset) >> blocksize_bits) = + min((16384 - 0) >> 12, (39239680 - 40157184) >> 12) = + min(4, (u64)-917504 = 18446744073708634112 >> 12) = 4 + +and + + ins_size = csum_size * tmp = 4 * 4 = 16 bytes. + +In other words, we insert a new csum item in the tree with key +(CSUM_OBJECTID CSUM_KEY 40157184 = sums->bytenr) that contains the checksums +for all the data (4 blocks of 4096 bytes each = sums->len). Which is wrong, +because the item with key (CSUM CSUM 40161280) (the one that was moved from +leaf N + 1 to the end of leaf N) contains the old checksums of the last 12288 +bytes of our data and won't get those old checksums removed. + +So this leaves us 2 different checksums for 3 4kb blocks of data in the tree, +and breaks the logical rule: + + Key_N+1.offset >= Key_N.offset + length_of_data_its_checksums_cover + +An obvious bad effect of this is that a subsequent csum tree lookup to get +the checksum of any of the blocks with logical offset of 40161280, 40165376 +or 40169472 (the last 3 4kb blocks of file data), will get the old checksums. + +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file-item.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/file-item.c ++++ b/fs/btrfs/file-item.c +@@ -754,7 +754,7 @@ again: + found_next = 1; + if (ret != 0) + goto insert; +- slot = 0; ++ slot = path->slots[0]; + } + btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); + if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || diff --git a/queue-3.10/series b/queue-3.10/series index bfad25e502d..359004b12eb 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -36,3 +36,4 @@ x86-don-t-exclude-low-bios-area-when-allocating-address-space-for-non-pci-cards. x86_64-vsyscall-fix-warn_bad_vsyscall-log-output.patch x86-efi-enforce-config_relocatable-for-efi-boot-stub.patch hpsa-fix-bad-enomem-return-value-in-hpsa_big_passthru_ioctl.patch +btrfs-fix-csum-tree-corruption-duplicate-and-outdated-checksums.patch -- 2.47.3