From 8e12fb2725bb8ccb092a7331a9dae40fba5ecd72 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 May 2018 10:34:53 +0200 Subject: [PATCH] 4.9-stable patches added patches: btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch --- ...a-blocks-after-degraded-raid1-mounts.patch | 95 +++++++++++++++++++ queue-4.9/series | 2 + ...x86_bug_sysret_ss_attrs-when-running.patch | 58 +++++++++++ 3 files changed, 155 insertions(+) create mode 100644 queue-4.9/btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch create mode 100644 queue-4.9/x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch diff --git a/queue-4.9/btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch b/queue-4.9/btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch new file mode 100644 index 00000000000..7b2523e9231 --- /dev/null +++ b/queue-4.9/btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch @@ -0,0 +1,95 @@ +From 02a3307aa9c20b4f6626255b028f07f6cfa16feb Mon Sep 17 00:00:00 2001 +From: Liu Bo +Date: Wed, 16 May 2018 01:37:36 +0800 +Subject: btrfs: fix reading stale metadata blocks after degraded raid1 mounts + +From: Liu Bo + +commit 02a3307aa9c20b4f6626255b028f07f6cfa16feb upstream. + +If a btree block, aka. extent buffer, is not available in the extent +buffer cache, it'll be read out from the disk instead, i.e. + +btrfs_search_slot() + read_block_for_search() # hold parent and its lock, go to read child + btrfs_release_path() + read_tree_block() # read child + +Unfortunately, the parent lock got released before reading child, so +commit 5bdd3536cbbe ("Btrfs: Fix block generation verification race") had +used 0 as parent transid to read the child block. It forces +read_tree_block() not to check if parent transid is different with the +generation id of the child that it reads out from disk. + +A simple PoC is included in btrfs/124, + +0. A two-disk raid1 btrfs, + +1. Right after mkfs.btrfs, block A is allocated to be device tree's root. + +2. Mount this filesystem and put it in use, after a while, device tree's + root got COW but block A hasn't been allocated/overwritten yet. + +3. Umount it and reload the btrfs module to remove both disks from the + global @fs_devices list. + +4. mount -odegraded dev1 and write some data, so now block A is allocated + to be a leaf in checksum tree. Note that only dev1 has the latest + metadata of this filesystem. + +5. Umount it and mount it again normally (with both disks), since raid1 + can pick up one disk by the writer task's pid, if btrfs_search_slot() + needs to read block A, dev2 which does NOT have the latest metadata + might be read for block A, then we got a stale block A. + +6. As parent transid is not checked, block A is marked as uptodate and + put into the extent buffer cache, so the future search won't bother + to read disk again, which means it'll make changes on this stale + one and make it dirty and flush it onto disk. + +To avoid the problem, parent transid needs to be passed to +read_tree_block(). + +In order to get a valid parent transid, we need to hold the parent's +lock until finishing reading child. + +This patch needs to be slightly adapted for stable kernels, the +&first_key parameter added to read_tree_block() is from 4.16+ +(581c1760415c4). The fix is to replace 0 by 'gen'. + +Fixes: 5bdd3536cbbe ("Btrfs: Fix block generation verification race") +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Liu Bo +Reviewed-by: Filipe Manana +Reviewed-by: Qu Wenruo +[ update changelog ] +Signed-off-by: David Sterba +Signed-off-by: Nikolay Borisov +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -2486,10 +2486,8 @@ read_block_for_search(struct btrfs_trans + if (p->reada != READA_NONE) + reada_for_search(root, p, level, slot, key->objectid); + +- btrfs_release_path(p); +- + ret = -EAGAIN; +- tmp = read_tree_block(root, blocknr, 0); ++ tmp = read_tree_block(fs_info, blocknr, gen); + if (!IS_ERR(tmp)) { + /* + * If the read above didn't mark this buffer up to date, +@@ -2503,6 +2501,8 @@ read_block_for_search(struct btrfs_trans + } else { + ret = PTR_ERR(tmp); + } ++ ++ btrfs_release_path(p); + return ret; + } + diff --git a/queue-4.9/series b/queue-4.9/series index 18aa731d98c..e43f9674935 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -31,3 +31,5 @@ arm-8770-1-kprobes-prohibit-probing-on-optimized_callback.patch arm-8772-1-kprobes-prohibit-kprobes-on-get_user-functions.patch btrfs-fix-xattr-loss-after-power-failure.patch btrfs-fix-crash-when-trying-to-resume-balance-without-the-resume-flag.patch +x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch +btrfs-fix-reading-stale-metadata-blocks-after-degraded-raid1-mounts.patch diff --git a/queue-4.9/x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch b/queue-4.9/x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch new file mode 100644 index 00000000000..a173c1cc6eb --- /dev/null +++ b/queue-4.9/x86-amd-don-t-set-x86_bug_sysret_ss_attrs-when-running.patch @@ -0,0 +1,58 @@ +From 35b8de6f6b9abfe9dfb44308e58cbf5461e9ceba Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Sun, 20 May 2018 20:51:10 +0100 +Subject: x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen + +commit def9331a12977770cc6132d79f8e6565871e8e38 upstream + +When running as Xen pv guest X86_BUG_SYSRET_SS_ATTRS must not be set +on AMD cpus. + +This bug/feature bit is kind of special as it will be used very early +when switching threads. Setting the bit and clearing it a little bit +later leaves a critical window where things can go wrong. This time +window has enlarged a little bit by using setup_clear_cpu_cap() instead +of the hypervisor's set_cpu_features callback. It seems this larger +window now makes it rather easy to hit the problem. + +The proper solution is to never set the bit in case of Xen. + +Signed-off-by: Juergen Gross +Reviewed-by: Boris Ostrovsky +Acked-by: Thomas Gleixner +Signed-off-by: Juergen Gross +Signed-off-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/amd.c | 5 +++-- + arch/x86/xen/enlighten.c | 4 +--- + 2 files changed, 4 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -824,8 +824,9 @@ static void init_amd(struct cpuinfo_x86 + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); + +- /* AMD CPUs don't reset SS attributes on SYSRET */ +- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ ++ if (!cpu_has(c, X86_FEATURE_XENPV)) ++ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + } + + #ifdef CONFIG_X86_32 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -1977,10 +1977,8 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); + + static void xen_set_cpu_features(struct cpuinfo_x86 *c) + { +- if (xen_pv_domain()) { +- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); ++ if (xen_pv_domain()) + set_cpu_cap(c, X86_FEATURE_XENPV); +- } + } + + static void xen_pin_vcpu(int cpu) -- 2.47.2