]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.19
authorSasha Levin <sashal@kernel.org>
Wed, 24 Jun 2020 14:11:51 +0000 (10:11 -0400)
committerSasha Levin <sashal@kernel.org>
Wed, 24 Jun 2020 14:11:51 +0000 (10:11 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch [new file with mode: 0644]
queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch [new file with mode: 0644]
queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch [new file with mode: 0644]
queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch b/queue-4.19/kvm-x86-fix-reserved-bits-related-calculation-errors.patch
new file mode 100644 (file)
index 0000000..2d368a9
--- /dev/null
@@ -0,0 +1,151 @@
+From 2abacf364f2c653047761567508bced29fae1d36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 May 2019 03:08:53 -0700
+Subject: kvm: x86: Fix reserved bits related calculation errors caused by
+ MKTME
+
+From: Kai Huang <kai.huang@linux.intel.com>
+
+[ Upstream commit f3ecb59dd49f1742b97df6ba071aaa3d031154ac ]
+
+Intel MKTME repurposes several high bits of physical address as 'keyID'
+for memory encryption thus effectively reduces platform's maximum
+physical address bits. Exactly how many bits are reduced is configured
+by BIOS. To honor such HW behavior, the repurposed bits are reduced from
+cpuinfo_x86->x86_phys_bits when MKTME is detected in CPU detection.
+Similarly, AMD SME/SEV also reduces physical address bits for memory
+encryption, and cpuinfo->x86_phys_bits is reduced too when SME/SEV is
+detected, so for both MKTME and SME/SEV, boot_cpu_data.x86_phys_bits
+doesn't hold physical address bits reported by CPUID anymore.
+
+Currently KVM treats bits from boot_cpu_data.x86_phys_bits to 51 as
+reserved bits, but it's not true anymore for MKTME, since MKTME treats
+those reduced bits as 'keyID', but not reserved bits. Therefore
+boot_cpu_data.x86_phys_bits cannot be used to calculate reserved bits
+anymore, although we can still use it for AMD SME/SEV since SME/SEV
+treats the reduced bits differently -- they are treated as reserved
+bits, the same as other reserved bits in page table entity [1].
+
+Fix by introducing a new 'shadow_phys_bits' variable in KVM x86 MMU code
+to store the effective physical bits w/o reserved bits -- for MKTME,
+it equals to physical address reported by CPUID, and for SME/SEV, it is
+boot_cpu_data.x86_phys_bits.
+
+Note that for the physical address bits reported to guest should remain
+unchanged -- KVM should report physical address reported by CPUID to
+guest, but not boot_cpu_data.x86_phys_bits. Because for Intel MKTME,
+there's no harm if guest sets up 'keyID' bits in guest page table (since
+MKTME only works at physical address level), and KVM doesn't even expose
+MKTME to guest. Arguably, for AMD SME/SEV, guest is aware of SEV thus it
+should adjust boot_cpu_data.x86_phys_bits when it detects SEV, therefore
+KVM should still reports physcial address reported by CPUID to guest.
+
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Kai Huang <kai.huang@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 33 +++++++++++++++++++++++++++------
+ 1 file changed, 27 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 80fc5bd4a32a8..ea744cc0d3fc9 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -281,6 +281,11 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+  */
+ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
++/*
++ * The number of non-reserved physical address bits irrespective of features
++ * that repurpose legal bits, e.g. MKTME.
++ */
++static u8 __read_mostly shadow_phys_bits;
+ static void mmu_spte_set(u64 *sptep, u64 spte);
+ static bool is_executable_pte(u64 spte);
+@@ -464,6 +469,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
++static u8 kvm_get_shadow_phys_bits(void)
++{
++      /*
++       * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
++       * in CPU detection code, but MKTME treats those reduced bits as
++       * 'keyID' thus they are not reserved bits. Therefore for MKTME
++       * we should still return physical address bits reported by CPUID.
++       */
++      if (!boot_cpu_has(X86_FEATURE_TME) ||
++          WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
++              return boot_cpu_data.x86_phys_bits;
++
++      return cpuid_eax(0x80000008) & 0xff;
++}
++
+ static void kvm_mmu_reset_all_pte_masks(void)
+ {
+       u8 low_phys_bits;
+@@ -477,6 +497,8 @@ static void kvm_mmu_reset_all_pte_masks(void)
+       shadow_present_mask = 0;
+       shadow_acc_track_mask = 0;
++      shadow_phys_bits = kvm_get_shadow_phys_bits();
++
+       /*
+        * If the CPU has 46 or less physical address bits, then set an
+        * appropriate mask to guard against L1TF attacks. Otherwise, it is
+@@ -4544,7 +4566,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+        */
+       shadow_zero_check = &context->shadow_zero_check;
+       __reset_rsvds_bits_mask(vcpu, shadow_zero_check,
+-                              boot_cpu_data.x86_phys_bits,
++                              shadow_phys_bits,
+                               context->shadow_root_level, uses_nx,
+                               guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+                               is_pse(vcpu), true);
+@@ -4581,13 +4603,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+       if (boot_cpu_is_amd())
+               __reset_rsvds_bits_mask(vcpu, shadow_zero_check,
+-                                      boot_cpu_data.x86_phys_bits,
++                                      shadow_phys_bits,
+                                       context->shadow_root_level, false,
+                                       boot_cpu_has(X86_FEATURE_GBPAGES),
+                                       true, true);
+       else
+               __reset_rsvds_bits_mask_ept(shadow_zero_check,
+-                                          boot_cpu_data.x86_phys_bits,
++                                          shadow_phys_bits,
+                                           false);
+       if (!shadow_me_mask)
+@@ -4608,7 +4630,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+                               struct kvm_mmu *context, bool execonly)
+ {
+       __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+-                                  boot_cpu_data.x86_phys_bits, execonly);
++                                  shadow_phys_bits, execonly);
+ }
+ #define BYTE_MASK(access) \
+@@ -6053,7 +6075,6 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+ static void kvm_set_mmio_spte_mask(void)
+ {
+       u64 mask;
+-      int maxphyaddr = boot_cpu_data.x86_phys_bits;
+       /*
+        * Set the reserved bits and the present bit of an paging-structure
+@@ -6073,7 +6094,7 @@ static void kvm_set_mmio_spte_mask(void)
+        * If reserved bit is not supported, clear the present bit to disable
+        * mmio page fault.
+        */
+-      if (maxphyaddr == 52)
++      if (shadow_phys_bits == 52)
+               mask &= ~1ull;
+       kvm_mmu_set_mmio_spte_mask(mask, mask);
+-- 
+2.25.1
+
diff --git a/queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch b/queue-4.19/kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch
new file mode 100644 (file)
index 0000000..8920c29
--- /dev/null
@@ -0,0 +1,69 @@
+From 0ddb51c53fd07a628e30c7da5bd8f07a0fbdd041 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 May 2020 01:49:09 -0700
+Subject: KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be
+ generated
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+[ Upstream commit 6129ed877d409037b79866327102c9dc59a302fe ]
+
+Set the mmio_value to '0' instead of simply clearing the present bit to
+squash a benign warning in kvm_mmu_set_mmio_spte_mask() that complains
+about the mmio_value overlapping the lower GFN mask on systems with 52
+bits of PA space.
+
+Opportunistically clean up the code and comments.
+
+Cc: stable@vger.kernel.org
+Fixes: d43e2675e96fc ("KVM: x86: only do L1TF workaround on affected processors")
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Message-Id: <20200527084909.23492-1-sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 27 +++++++++------------------
+ 1 file changed, 9 insertions(+), 18 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index ea744cc0d3fc9..0679303e5f3db 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -6077,25 +6077,16 @@ static void kvm_set_mmio_spte_mask(void)
+       u64 mask;
+       /*
+-       * Set the reserved bits and the present bit of an paging-structure
+-       * entry to generate page fault with PFER.RSV = 1.
++       * Set a reserved PA bit in MMIO SPTEs to generate page faults with
++       * PFEC.RSVD=1 on MMIO accesses.  64-bit PTEs (PAE, x86-64, and EPT
++       * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports
++       * 52-bit physical addresses then there are no reserved PA bits in the
++       * PTEs and so the reserved PA approach must be disabled.
+        */
+-
+-      /*
+-       * Mask the uppermost physical address bit, which would be reserved as
+-       * long as the supported physical address width is less than 52.
+-       */
+-      mask = 1ull << 51;
+-
+-      /* Set the present bit. */
+-      mask |= 1ull;
+-
+-      /*
+-       * If reserved bit is not supported, clear the present bit to disable
+-       * mmio page fault.
+-       */
+-      if (shadow_phys_bits == 52)
+-              mask &= ~1ull;
++      if (shadow_phys_bits < 52)
++              mask = BIT_ULL(51) | PT_PRESENT_MASK;
++      else
++              mask = 0;
+       kvm_mmu_set_mmio_spte_mask(mask, mask);
+ }
+-- 
+2.25.1
+
diff --git a/queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch b/queue-4.19/kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch
new file mode 100644 (file)
index 0000000..bd8e372
--- /dev/null
@@ -0,0 +1,126 @@
+From de472e447a28690b2a5e9e7ca5a216a518f8da0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 May 2019 03:08:52 -0700
+Subject: kvm: x86: Move kvm_set_mmio_spte_mask() from x86.c to mmu.c
+
+From: Kai Huang <kai.huang@linux.intel.com>
+
+[ Upstream commit 7b6f8a06e482960ba6ab06faba51c8f3727a5c7b ]
+
+As a prerequisite to fix several SPTE reserved bits related calculation
+errors caused by MKTME, which requires kvm_set_mmio_spte_mask() to use
+local static variable defined in mmu.c.
+
+Also move call site of kvm_set_mmio_spte_mask() from kvm_arch_init() to
+kvm_mmu_module_init() so that kvm_set_mmio_spte_mask() can be static.
+
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Kai Huang <kai.huang@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 31 +++++++++++++++++++++++++++++++
+ arch/x86/kvm/x86.c | 31 -------------------------------
+ 2 files changed, 31 insertions(+), 31 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 18632f15b29f1..80fc5bd4a32a8 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -6050,6 +6050,35 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+       return 0;
+ }
++static void kvm_set_mmio_spte_mask(void)
++{
++      u64 mask;
++      int maxphyaddr = boot_cpu_data.x86_phys_bits;
++
++      /*
++       * Set the reserved bits and the present bit of an paging-structure
++       * entry to generate page fault with PFER.RSV = 1.
++       */
++
++      /*
++       * Mask the uppermost physical address bit, which would be reserved as
++       * long as the supported physical address width is less than 52.
++       */
++      mask = 1ull << 51;
++
++      /* Set the present bit. */
++      mask |= 1ull;
++
++      /*
++       * If reserved bit is not supported, clear the present bit to disable
++       * mmio page fault.
++       */
++      if (maxphyaddr == 52)
++              mask &= ~1ull;
++
++      kvm_mmu_set_mmio_spte_mask(mask, mask);
++}
++
+ int kvm_mmu_module_init(void)
+ {
+       int ret = -ENOMEM;
+@@ -6059,6 +6088,8 @@ int kvm_mmu_module_init(void)
+       kvm_mmu_reset_all_pte_masks();
++      kvm_set_mmio_spte_mask();
++
+       pte_list_desc_cache = kmem_cache_create("pte_list_desc",
+                                           sizeof(struct pte_list_desc),
+                                           0, SLAB_ACCOUNT, NULL);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index c53df0b953850..50d59ad34619c 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -6775,35 +6775,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
+       .get_guest_ip           = kvm_get_guest_ip,
+ };
+-static void kvm_set_mmio_spte_mask(void)
+-{
+-      u64 mask;
+-      int maxphyaddr = boot_cpu_data.x86_phys_bits;
+-
+-      /*
+-       * Set the reserved bits and the present bit of an paging-structure
+-       * entry to generate page fault with PFER.RSV = 1.
+-       */
+-
+-      /*
+-       * Mask the uppermost physical address bit, which would be reserved as
+-       * long as the supported physical address width is less than 52.
+-       */
+-      mask = 1ull << 51;
+-
+-      /* Set the present bit. */
+-      mask |= 1ull;
+-
+-      /*
+-       * If reserved bit is not supported, clear the present bit to disable
+-       * mmio page fault.
+-       */
+-      if (maxphyaddr == 52)
+-              mask &= ~1ull;
+-
+-      kvm_mmu_set_mmio_spte_mask(mask, mask);
+-}
+-
+ #ifdef CONFIG_X86_64
+ static void pvclock_gtod_update_fn(struct work_struct *work)
+ {
+@@ -6881,8 +6852,6 @@ int kvm_arch_init(void *opaque)
+       if (r)
+               goto out_free_percpu;
+-      kvm_set_mmio_spte_mask();
+-
+       kvm_x86_ops = ops;
+       kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
+-- 
+2.25.1
+
diff --git a/queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch b/queue-4.19/md-add-feature-flag-md_feature_raid0_layout.patch
new file mode 100644 (file)
index 0000000..be9668c
--- /dev/null
@@ -0,0 +1,115 @@
+From 3116361db3be67ffee35ef5ab9c19e134dfe88c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Sep 2019 16:52:29 +1000
+Subject: md: add feature flag MD_FEATURE_RAID0_LAYOUT
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 33f2c35a54dfd75ad0e7e86918dcbe4de799a56c ]
+
+Due to a bug introduced in Linux 3.14 we cannot determine the
+correctly layout for a multi-zone RAID0 array - there are two
+possibilities.
+
+It is possible to tell the kernel which to chose using a module
+parameter, but this can be clumsy to use.  It would be best if
+the choice were recorded in the metadata.
+So add a feature flag for this purpose.
+If it is set, then the 'layout' field of the superblock is used
+to determine which layout to use.
+
+If this flag is not set, then mddev->layout gets set to -1,
+which causes the module parameter to be required.
+
+Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Song Liu <songliubraving@fb.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/md.c                | 13 +++++++++++++
+ drivers/md/raid0.c             |  3 +++
+ include/uapi/linux/raid/md_p.h |  2 ++
+ 3 files changed, 18 insertions(+)
+
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index a6db4fd267aaf..a4e7e6c025d9c 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -1182,6 +1182,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
+                       mddev->new_layout = mddev->layout;
+                       mddev->new_chunk_sectors = mddev->chunk_sectors;
+               }
++              if (mddev->level == 0)
++                      mddev->layout = -1;
+               if (sb->state & (1<<MD_SB_CLEAN))
+                       mddev->recovery_cp = MaxSector;
+@@ -1598,6 +1600,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
+               rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
+       }
++      if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
++          sb->level != 0)
++              return -EINVAL;
++
+       if (!refdev) {
+               ret = 1;
+       } else {
+@@ -1708,6 +1714,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
+                       mddev->new_chunk_sectors = mddev->chunk_sectors;
+               }
++              if (mddev->level == 0 &&
++                  !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
++                      mddev->layout = -1;
++
+               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
+                       set_bit(MD_HAS_JOURNAL, &mddev->flags);
+@@ -6784,6 +6794,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
+       mddev->external      = 0;
+       mddev->layout        = info->layout;
++      if (mddev->level == 0)
++              /* Cannot trust RAID0 layout info here */
++              mddev->layout = -1;
+       mddev->chunk_sectors = info->chunk_size >> 9;
+       if (mddev->persistent) {
+diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
+index efa9df2336dac..0272102b207ea 100644
+--- a/drivers/md/raid0.c
++++ b/drivers/md/raid0.c
+@@ -152,6 +152,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
+       if (conf->nr_strip_zones == 1) {
+               conf->layout = RAID0_ORIG_LAYOUT;
++      } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
++                 mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
++              conf->layout = mddev->layout;
+       } else if (default_layout == RAID0_ORIG_LAYOUT ||
+                  default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+               conf->layout = default_layout;
+diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
+index b0d15c73f6d75..1f2d8c81f0e0c 100644
+--- a/include/uapi/linux/raid/md_p.h
++++ b/include/uapi/linux/raid/md_p.h
+@@ -329,6 +329,7 @@ struct mdp_superblock_1 {
+ #define       MD_FEATURE_JOURNAL              512 /* support write cache */
+ #define       MD_FEATURE_PPL                  1024 /* support PPL */
+ #define       MD_FEATURE_MULTIPLE_PPLS        2048 /* support for multiple PPLs */
++#define       MD_FEATURE_RAID0_LAYOUT         4096 /* layout is meaningful for RAID0 */
+ #define       MD_FEATURE_ALL                  (MD_FEATURE_BITMAP_OFFSET       \
+                                       |MD_FEATURE_RECOVERY_OFFSET     \
+                                       |MD_FEATURE_RESHAPE_ACTIVE      \
+@@ -341,6 +342,7 @@ struct mdp_superblock_1 {
+                                       |MD_FEATURE_JOURNAL             \
+                                       |MD_FEATURE_PPL                 \
+                                       |MD_FEATURE_MULTIPLE_PPLS       \
++                                      |MD_FEATURE_RAID0_LAYOUT        \
+                                       )
+ struct r5l_payload_header {
+-- 
+2.25.1
+
index 0012610f10448512621724f84040ce31f484fbfd..8b7ee2eaa22442e9c0c5265170c45641c782d25a 100644 (file)
@@ -201,3 +201,7 @@ kretprobe-prevent-triggering-kretprobe-from-within-k.patch
 sched-rt-net-use-config_preemption.patch.patch
 net-core-device_rename-use-rwsem-instead-of-a-seqcou.patch
 revert-dpaa_eth-fix-usage-as-dsa-master-try-3.patch
+md-add-feature-flag-md_feature_raid0_layout.patch
+kvm-x86-move-kvm_set_mmio_spte_mask-from-x86.c-to-mm.patch
+kvm-x86-fix-reserved-bits-related-calculation-errors.patch
+kvm-x86-mmu-set-mmio_value-to-0-if-reserved-pf-can-t.patch