From 966140c2f006bd478b66550e454e5a06683684f2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 24 Jun 2020 10:11:51 -0400 Subject: [PATCH] Fixes for 4.19 Signed-off-by: Sasha Levin --- ...rved-bits-related-calculation-errors.patch | 151 ++++++++++++++++++ ...mmio_value-to-0-if-reserved-pf-can-t.patch | 69 ++++++++ ..._set_mmio_spte_mask-from-x86.c-to-mm.patch | 126 +++++++++++++++ ...feature-flag-md_feature_raid0_layout.patch | 115 +++++++++++++ queue-4.19/series | 4 + 5 files changed, 465 insertions(+) create mode 100644 queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch create mode 100644 queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch create mode 100644 queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch create mode 100644 queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch diff --git a/queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch b/queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch new file mode 100644 index 00000000000..2d368a98766 --- /dev/null +++ b/queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch @@ -0,0 +1,151 @@ +From 2abacf364f2c653047761567508bced29fae1d36 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 May 2019 03:08:53 -0700 +Subject: kvm: x86: Fix reserved bits related calculation errors caused by + MKTME + +From: Kai Huang + +[ Upstream commit f3ecb59dd49f1742b97df6ba071aaa3d031154ac ] + +Intel MKTME repurposes several high bits of physical address as 'keyID' +for memory encryption thus effectively reduces platform's maximum +physical address bits. Exactly how many bits are reduced is configured +by BIOS. To honor such HW behavior, the repurposed bits are reduced from +cpuinfo_x86->x86_phys_bits when MKTME is detected in CPU detection. +Similarly, AMD SME/SEV also reduces physical address bits for memory +encryption, and cpuinfo->x86_phys_bits is reduced too when SME/SEV is +detected, so for both MKTME and SME/SEV, boot_cpu_data.x86_phys_bits +doesn't hold physical address bits reported by CPUID anymore. + +Currently KVM treats bits from boot_cpu_data.x86_phys_bits to 51 as +reserved bits, but it's not true anymore for MKTME, since MKTME treats +those reduced bits as 'keyID', but not reserved bits. Therefore +boot_cpu_data.x86_phys_bits cannot be used to calculate reserved bits +anymore, although we can still use it for AMD SME/SEV since SME/SEV +treats the reduced bits differently -- they are treated as reserved +bits, the same as other reserved bits in page table entity [1]. + +Fix by introducing a new 'shadow_phys_bits' variable in KVM x86 MMU code +to store the effective physical bits w/o reserved bits -- for MKTME, +it equals to physical address reported by CPUID, and for SME/SEV, it is +boot_cpu_data.x86_phys_bits. + +Note that for the physical address bits reported to guest should remain +unchanged -- KVM should report physical address reported by CPUID to +guest, but not boot_cpu_data.x86_phys_bits. Because for Intel MKTME, +there's no harm if guest sets up 'keyID' bits in guest page table (since +MKTME only works at physical address level), and KVM doesn't even expose +MKTME to guest. Arguably, for AMD SME/SEV, guest is aware of SEV thus it +should adjust boot_cpu_data.x86_phys_bits when it detects SEV, therefore +KVM should still reports physcial address reported by CPUID to guest. + +Reviewed-by: Sean Christopherson +Signed-off-by: Kai Huang +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 33 +++++++++++++++++++++++++++------ + 1 file changed, 27 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 80fc5bd4a32a8..ea744cc0d3fc9 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -281,6 +281,11 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; + */ + static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + ++/* ++ * The number of non-reserved physical address bits irrespective of features ++ * that repurpose legal bits, e.g. MKTME. ++ */ ++static u8 __read_mostly shadow_phys_bits; + + static void mmu_spte_set(u64 *sptep, u64 spte); + static bool is_executable_pte(u64 spte); +@@ -464,6 +469,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, + } + EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + ++static u8 kvm_get_shadow_phys_bits(void) ++{ ++ /* ++ * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected ++ * in CPU detection code, but MKTME treats those reduced bits as ++ * 'keyID' thus they are not reserved bits. Therefore for MKTME ++ * we should still return physical address bits reported by CPUID. ++ */ ++ if (!boot_cpu_has(X86_FEATURE_TME) || ++ WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) ++ return boot_cpu_data.x86_phys_bits; ++ ++ return cpuid_eax(0x80000008) & 0xff; ++} ++ + static void kvm_mmu_reset_all_pte_masks(void) + { + u8 low_phys_bits; +@@ -477,6 +497,8 @@ static void kvm_mmu_reset_all_pte_masks(void) + shadow_present_mask = 0; + shadow_acc_track_mask = 0; + ++ shadow_phys_bits = kvm_get_shadow_phys_bits(); ++ + /* + * If the CPU has 46 or less physical address bits, then set an + * appropriate mask to guard against L1TF attacks. Otherwise, it is +@@ -4544,7 +4566,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) + */ + shadow_zero_check = &context->shadow_zero_check; + __reset_rsvds_bits_mask(vcpu, shadow_zero_check, +- boot_cpu_data.x86_phys_bits, ++ shadow_phys_bits, + context->shadow_root_level, uses_nx, + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), + is_pse(vcpu), true); +@@ -4581,13 +4603,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + + if (boot_cpu_is_amd()) + __reset_rsvds_bits_mask(vcpu, shadow_zero_check, +- boot_cpu_data.x86_phys_bits, ++ shadow_phys_bits, + context->shadow_root_level, false, + boot_cpu_has(X86_FEATURE_GBPAGES), + true, true); + else + __reset_rsvds_bits_mask_ept(shadow_zero_check, +- boot_cpu_data.x86_phys_bits, ++ shadow_phys_bits, + false); + + if (!shadow_me_mask) +@@ -4608,7 +4630,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, bool execonly) + { + __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, +- boot_cpu_data.x86_phys_bits, execonly); ++ shadow_phys_bits, execonly); + } + + #define BYTE_MASK(access) \ +@@ -6053,7 +6075,6 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) + static void kvm_set_mmio_spte_mask(void) + { + u64 mask; +- int maxphyaddr = boot_cpu_data.x86_phys_bits; + + /* + * Set the reserved bits and the present bit of an paging-structure +@@ -6073,7 +6094,7 @@ static void kvm_set_mmio_spte_mask(void) + * If reserved bit is not supported, clear the present bit to disable + * mmio page fault. + */ +- if (maxphyaddr == 52) ++ if (shadow_phys_bits == 52) + mask &= ~1ull; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +-- +2.25.1 + diff --git a/queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch b/queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch new file mode 100644 index 00000000000..8920c294409 --- /dev/null +++ b/queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch @@ -0,0 +1,69 @@ +From 0ddb51c53fd07a628e30c7da5bd8f07a0fbdd041 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 May 2020 01:49:09 -0700 +Subject: KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be + generated + +From: Sean Christopherson + +[ Upstream commit 6129ed877d409037b79866327102c9dc59a302fe ] + +Set the mmio_value to '0' instead of simply clearing the present bit to +squash a benign warning in kvm_mmu_set_mmio_spte_mask() that complains +about the mmio_value overlapping the lower GFN mask on systems with 52 +bits of PA space. + +Opportunistically clean up the code and comments. + +Cc: stable@vger.kernel.org +Fixes: d43e2675e96fc ("KVM: x86: only do L1TF workaround on affected processors") +Signed-off-by: Sean Christopherson +Message-Id: <20200527084909.23492-1-sean.j.christopherson@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index ea744cc0d3fc9..0679303e5f3db 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -6077,25 +6077,16 @@ static void kvm_set_mmio_spte_mask(void) + u64 mask; + + /* +- * Set the reserved bits and the present bit of an paging-structure +- * entry to generate page fault with PFER.RSV = 1. ++ * Set a reserved PA bit in MMIO SPTEs to generate page faults with ++ * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT ++ * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports ++ * 52-bit physical addresses then there are no reserved PA bits in the ++ * PTEs and so the reserved PA approach must be disabled. + */ +- +- /* +- * Mask the uppermost physical address bit, which would be reserved as +- * long as the supported physical address width is less than 52. +- */ +- mask = 1ull << 51; +- +- /* Set the present bit. */ +- mask |= 1ull; +- +- /* +- * If reserved bit is not supported, clear the present bit to disable +- * mmio page fault. +- */ +- if (shadow_phys_bits == 52) +- mask &= ~1ull; ++ if (shadow_phys_bits < 52) ++ mask = BIT_ULL(51) | PT_PRESENT_MASK; ++ else ++ mask = 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask); + } +-- +2.25.1 + diff --git a/queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch b/queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch new file mode 100644 index 00000000000..bd8e372fa68 --- /dev/null +++ b/queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch @@ -0,0 +1,126 @@ +From de472e447a28690b2a5e9e7ca5a216a518f8da0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 May 2019 03:08:52 -0700 +Subject: kvm: x86: Move kvm_set_mmio_spte_mask() from x86.c to mmu.c + +From: Kai Huang + +[ Upstream commit 7b6f8a06e482960ba6ab06faba51c8f3727a5c7b ] + +As a prerequisite to fix several SPTE reserved bits related calculation +errors caused by MKTME, which requires kvm_set_mmio_spte_mask() to use +local static variable defined in mmu.c. + +Also move call site of kvm_set_mmio_spte_mask() from kvm_arch_init() to +kvm_mmu_module_init() so that kvm_set_mmio_spte_mask() can be static. + +Reviewed-by: Sean Christopherson +Signed-off-by: Kai Huang +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 31 +++++++++++++++++++++++++++++++ + arch/x86/kvm/x86.c | 31 ------------------------------- + 2 files changed, 31 insertions(+), 31 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 18632f15b29f1..80fc5bd4a32a8 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -6050,6 +6050,35 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) + return 0; + } + ++static void kvm_set_mmio_spte_mask(void) ++{ ++ u64 mask; ++ int maxphyaddr = boot_cpu_data.x86_phys_bits; ++ ++ /* ++ * Set the reserved bits and the present bit of an paging-structure ++ * entry to generate page fault with PFER.RSV = 1. ++ */ ++ ++ /* ++ * Mask the uppermost physical address bit, which would be reserved as ++ * long as the supported physical address width is less than 52. ++ */ ++ mask = 1ull << 51; ++ ++ /* Set the present bit. */ ++ mask |= 1ull; ++ ++ /* ++ * If reserved bit is not supported, clear the present bit to disable ++ * mmio page fault. ++ */ ++ if (maxphyaddr == 52) ++ mask &= ~1ull; ++ ++ kvm_mmu_set_mmio_spte_mask(mask, mask); ++} ++ + int kvm_mmu_module_init(void) + { + int ret = -ENOMEM; +@@ -6059,6 +6088,8 @@ int kvm_mmu_module_init(void) + + kvm_mmu_reset_all_pte_masks(); + ++ kvm_set_mmio_spte_mask(); ++ + pte_list_desc_cache = kmem_cache_create("pte_list_desc", + sizeof(struct pte_list_desc), + 0, SLAB_ACCOUNT, NULL); +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index c53df0b953850..50d59ad34619c 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -6775,35 +6775,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { + .get_guest_ip = kvm_get_guest_ip, + }; + +-static void kvm_set_mmio_spte_mask(void) +-{ +- u64 mask; +- int maxphyaddr = boot_cpu_data.x86_phys_bits; +- +- /* +- * Set the reserved bits and the present bit of an paging-structure +- * entry to generate page fault with PFER.RSV = 1. +- */ +- +- /* +- * Mask the uppermost physical address bit, which would be reserved as +- * long as the supported physical address width is less than 52. +- */ +- mask = 1ull << 51; +- +- /* Set the present bit. */ +- mask |= 1ull; +- +- /* +- * If reserved bit is not supported, clear the present bit to disable +- * mmio page fault. +- */ +- if (maxphyaddr == 52) +- mask &= ~1ull; +- +- kvm_mmu_set_mmio_spte_mask(mask, mask); +-} +- + #ifdef CONFIG_X86_64 + static void pvclock_gtod_update_fn(struct work_struct *work) + { +@@ -6881,8 +6852,6 @@ int kvm_arch_init(void *opaque) + if (r) + goto out_free_percpu; + +- kvm_set_mmio_spte_mask(); +- + kvm_x86_ops = ops; + + kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, +-- +2.25.1 + diff --git a/queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch b/queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch new file mode 100644 index 00000000000..be9668c7ec6 --- /dev/null +++ b/queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch @@ -0,0 +1,115 @@ +From 3116361db3be67ffee35ef5ab9c19e134dfe88c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Sep 2019 16:52:29 +1000 +Subject: md: add feature flag MD_FEATURE_RAID0_LAYOUT + +From: NeilBrown + +[ Upstream commit 33f2c35a54dfd75ad0e7e86918dcbe4de799a56c ] + +Due to a bug introduced in Linux 3.14 we cannot determine the +correctly layout for a multi-zone RAID0 array - there are two +possibilities. + +It is possible to tell the kernel which to chose using a module +parameter, but this can be clumsy to use. It would be best if +the choice were recorded in the metadata. +So add a feature flag for this purpose. +If it is set, then the 'layout' field of the superblock is used +to determine which layout to use. + +If this flag is not set, then mddev->layout gets set to -1, +which causes the module parameter to be required. + +Acked-by: Guoqing Jiang +Signed-off-by: NeilBrown +Signed-off-by: Song Liu +Signed-off-by: Sasha Levin +--- + drivers/md/md.c | 13 +++++++++++++ + drivers/md/raid0.c | 3 +++ + include/uapi/linux/raid/md_p.h | 2 ++ + 3 files changed, 18 insertions(+) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index a6db4fd267aaf..a4e7e6c025d9c 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -1182,6 +1182,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) + mddev->new_layout = mddev->layout; + mddev->new_chunk_sectors = mddev->chunk_sectors; + } ++ if (mddev->level == 0) ++ mddev->layout = -1; + + if (sb->state & (1<recovery_cp = MaxSector; +@@ -1598,6 +1600,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ + rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; + } + ++ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) && ++ sb->level != 0) ++ return -EINVAL; ++ + if (!refdev) { + ret = 1; + } else { +@@ -1708,6 +1714,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) + mddev->new_chunk_sectors = mddev->chunk_sectors; + } + ++ if (mddev->level == 0 && ++ !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT)) ++ mddev->layout = -1; ++ + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) + set_bit(MD_HAS_JOURNAL, &mddev->flags); + +@@ -6784,6 +6794,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) + mddev->external = 0; + + mddev->layout = info->layout; ++ if (mddev->level == 0) ++ /* Cannot trust RAID0 layout info here */ ++ mddev->layout = -1; + mddev->chunk_sectors = info->chunk_size >> 9; + + if (mddev->persistent) { +diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c +index efa9df2336dac..0272102b207ea 100644 +--- a/drivers/md/raid0.c ++++ b/drivers/md/raid0.c +@@ -152,6 +152,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) + + if (conf->nr_strip_zones == 1) { + conf->layout = RAID0_ORIG_LAYOUT; ++ } else if (mddev->layout == RAID0_ORIG_LAYOUT || ++ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { ++ conf->layout = mddev->layout; + } else if (default_layout == RAID0_ORIG_LAYOUT || + default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { + conf->layout = default_layout; +diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h +index b0d15c73f6d75..1f2d8c81f0e0c 100644 +--- a/include/uapi/linux/raid/md_p.h ++++ b/include/uapi/linux/raid/md_p.h +@@ -329,6 +329,7 @@ struct mdp_superblock_1 { + #define MD_FEATURE_JOURNAL 512 /* support write cache */ + #define MD_FEATURE_PPL 1024 /* support PPL */ + #define MD_FEATURE_MULTIPLE_PPLS 2048 /* support for multiple PPLs */ ++#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful for RAID0 */ + #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ + |MD_FEATURE_RECOVERY_OFFSET \ + |MD_FEATURE_RESHAPE_ACTIVE \ +@@ -341,6 +342,7 @@ struct mdp_superblock_1 { + |MD_FEATURE_JOURNAL \ + |MD_FEATURE_PPL \ + |MD_FEATURE_MULTIPLE_PPLS \ ++ |MD_FEATURE_RAID0_LAYOUT \ + ) + + struct r5l_payload_header { +-- +2.25.1 + diff --git a/queue-4.19/series b/queue-4.19/series index 0012610f104..8b7ee2eaa22 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -201,3 +201,7 @@ kretprobe-prevent-triggering-kretprobe-from-within-k.patch sched-rt-net-use-config_preemption.patch.patch net-core-device_rename-use-rwsem-instead-of-a-seqcou.patch revert-dpaa_eth-fix-usage-as-dsa-master-try-3.patch +md-add-feature-flag-md_feature_raid0_layout.patch +kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch +kvm-x86-fix-reserved-bits-related-calculation-errors.patch +kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch -- 2.47.3