]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Apr 2022 11:34:12 +0000 (13:34 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Apr 2022 11:34:12 +0000 (13:34 +0200)
added patches:
arm_pmu-validate-single-group-leader-events.patch
kvm-nvmx-defer-apicv-updates-while-l2-is-active-until-l1-is-active.patch
kvm-svm-flush-when-freeing-encrypted-pages-even-on-sme_coherent-cpus.patch
kvm-x86-pend-kvm_req_apicv_update-during-vcpu-creation-to-fix-a-race.patch
kvm-x86-pmu-update-amd-pmc-sample-period-to-fix-guest-nmi-watchdog.patch

queue-5.15/arm_pmu-validate-single-group-leader-events.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-defer-apicv-updates-while-l2-is-active-until-l1-is-active.patch [new file with mode: 0644]
queue-5.15/kvm-svm-flush-when-freeing-encrypted-pages-even-on-sme_coherent-cpus.patch [new file with mode: 0644]
queue-5.15/kvm-x86-pend-kvm_req_apicv_update-during-vcpu-creation-to-fix-a-race.patch [new file with mode: 0644]
queue-5.15/kvm-x86-pmu-update-amd-pmc-sample-period-to-fix-guest-nmi-watchdog.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/arm_pmu-validate-single-group-leader-events.patch b/queue-5.15/arm_pmu-validate-single-group-leader-events.patch
new file mode 100644 (file)
index 0000000..06f8d19
--- /dev/null
@@ -0,0 +1,54 @@
+From e5c23779f93d45e39a52758ca593bd7e62e9b4be Mon Sep 17 00:00:00 2001
+From: Rob Herring <robh@kernel.org>
+Date: Fri, 8 Apr 2022 15:33:30 -0500
+Subject: arm_pmu: Validate single/group leader events
+
+From: Rob Herring <robh@kernel.org>
+
+commit e5c23779f93d45e39a52758ca593bd7e62e9b4be upstream.
+
+In the case where there is only a cycle counter available (i.e.
+PMCR_EL0.N is 0) and an event other than CPU cycles is opened, the open
+should fail as the event can never possibly be scheduled. However, the
+event validation when an event is opened is skipped when the group
+leader is opened. Fix this by always validating the group leader events.
+
+Reported-by: Al Grant <al.grant@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20220408203330.4014015-1-robh@kernel.org
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/perf/arm_pmu.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/drivers/perf/arm_pmu.c
++++ b/drivers/perf/arm_pmu.c
+@@ -398,6 +398,9 @@ validate_group(struct perf_event *event)
+       if (!validate_event(event->pmu, &fake_pmu, leader))
+               return -EINVAL;
++      if (event == leader)
++              return 0;
++
+       for_each_sibling_event(sibling, leader) {
+               if (!validate_event(event->pmu, &fake_pmu, sibling))
+                       return -EINVAL;
+@@ -487,12 +490,7 @@ __hw_perf_event_init(struct perf_event *
+               local64_set(&hwc->period_left, hwc->sample_period);
+       }
+-      if (event->group_leader != event) {
+-              if (validate_group(event) != 0)
+-                      return -EINVAL;
+-      }
+-
+-      return 0;
++      return validate_group(event);
+ }
+ static int armpmu_event_init(struct perf_event *event)
diff --git a/queue-5.15/kvm-nvmx-defer-apicv-updates-while-l2-is-active-until-l1-is-active.patch b/queue-5.15/kvm-nvmx-defer-apicv-updates-while-l2-is-active-until-l1-is-active.patch
new file mode 100644 (file)
index 0000000..315b3d8
--- /dev/null
@@ -0,0 +1,93 @@
+From 7c69661e225cc484fbf44a0b99b56714a5241ae3 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 20 Apr 2022 01:37:30 +0000
+Subject: KVM: nVMX: Defer APICv updates while L2 is active until L1 is active
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 7c69661e225cc484fbf44a0b99b56714a5241ae3 upstream.
+
+Defer APICv updates that occur while L2 is active until nested VM-Exit,
+i.e. until L1 regains control.  vmx_refresh_apicv_exec_ctrl() assumes L1
+is active and (a) stomps all over vmcs02 and (b) neglects to ever updated
+vmcs01.  E.g. if vmcs12 doesn't enable the TPR shadow for L2 (and thus no
+APICv controls), L1 performs nested VM-Enter APICv inhibited, and APICv
+becomes unhibited while L2 is active, KVM will set various APICv controls
+in vmcs02 and trigger a failed VM-Entry.  The kicker is that, unless
+running with nested_early_check=1, KVM blames L1 and chaos ensues.
+
+In all cases, ignoring vmcs02 and always deferring the inhibition change
+to vmcs01 is correct (or at least acceptable).  The ABSENT and DISABLE
+inhibitions cannot truly change while L2 is active (see below).
+
+IRQ_BLOCKING can change, but it is firmly a best effort debug feature.
+Furthermore, only L2's APIC is accelerated/virtualized to the full extent
+possible, e.g. even if L1 passes through its APIC to L2, normal MMIO/MSR
+interception will apply to the virtual APIC managed by KVM.
+The exception is the SELF_IPI register when x2APIC is enabled, but that's
+an acceptable hole.
+
+Lastly, Hyper-V's Auto EOI can technically be toggled if L1 exposes the
+MSRs to L2, but for that to work in any sane capacity, L1 would need to
+pass through IRQs to L2 as well, and IRQs must be intercepted to enable
+virtual interrupt delivery.  I.e. exposing Auto EOI to L2 and enabling
+VID for L2 are, for all intents and purposes, mutually exclusive.
+
+Lack of dynamic toggling is also why this scenario is all but impossible
+to encounter in KVM's current form.  But a future patch will pend an
+APICv update request _during_ vCPU creation to plug a race where a vCPU
+that's being created doesn't get included in the "all vCPUs request"
+because it's not yet visible to other vCPUs.  If userspaces restores L2
+after VM creation (hello, KVM selftests), the first KVM_RUN will occur
+while L2 is active and thus service the APICv update request made during
+VM creation.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220420013732.3308816-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |    5 +++++
+ arch/x86/kvm/vmx/vmx.c    |    5 +++++
+ arch/x86/kvm/vmx/vmx.h    |    1 +
+ 3 files changed, 11 insertions(+)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4601,6 +4601,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *
+               kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+       }
++      if (vmx->nested.update_vmcs01_apicv_status) {
++              vmx->nested.update_vmcs01_apicv_status = false;
++              kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++      }
++
+       if ((vm_exit_reason != -1) &&
+           (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
+               vmx->nested.need_vmcs12_to_shadow_sync = true;
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -4098,6 +4098,11 @@ static void vmx_refresh_apicv_exec_ctrl(
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
++      if (is_guest_mode(vcpu)) {
++              vmx->nested.update_vmcs01_apicv_status = true;
++              return;
++      }
++
+       pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
+       if (cpu_has_secondary_exec_ctrls()) {
+               if (kvm_vcpu_apicv_active(vcpu))
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -164,6 +164,7 @@ struct nested_vmx {
+       bool change_vmcs01_virtual_apic_mode;
+       bool reload_vmcs01_apic_access_page;
+       bool update_vmcs01_cpu_dirty_logging;
++      bool update_vmcs01_apicv_status;
+       /*
+        * Enlightened VMCS has been enabled. It does not mean that L1 has to
diff --git a/queue-5.15/kvm-svm-flush-when-freeing-encrypted-pages-even-on-sme_coherent-cpus.patch b/queue-5.15/kvm-svm-flush-when-freeing-encrypted-pages-even-on-sme_coherent-cpus.patch
new file mode 100644 (file)
index 0000000..e095881
--- /dev/null
@@ -0,0 +1,46 @@
+From d45829b351ee6ec5f54dd55e6aca1f44fe239fe6 Mon Sep 17 00:00:00 2001
+From: Mingwei Zhang <mizhang@google.com>
+Date: Thu, 21 Apr 2022 03:14:06 +0000
+Subject: KVM: SVM: Flush when freeing encrypted pages even on SME_COHERENT CPUs
+
+From: Mingwei Zhang <mizhang@google.com>
+
+commit d45829b351ee6ec5f54dd55e6aca1f44fe239fe6 upstream.
+
+Use clflush_cache_range() to flush the confidential memory when
+SME_COHERENT is supported in AMD CPU. Cache flush is still needed since
+SME_COHERENT only support cache invalidation at CPU side. All confidential
+cache lines are still incoherent with DMA devices.
+
+Cc: stable@vger.kerel.org
+
+Fixes: add5e2f04541 ("KVM: SVM: Add support for the SEV-ES VMSA")
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20220421031407.2516575-3-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1990,11 +1990,14 @@ static void sev_flush_guest_memory(struc
+                                  unsigned long len)
+ {
+       /*
+-       * If hardware enforced cache coherency for encrypted mappings of the
+-       * same physical page is supported, nothing to do.
++       * If CPU enforced cache coherency for encrypted mappings of the
++       * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
++       * flush is still needed in order to work properly with DMA devices.
+        */
+-      if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
++      if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
++              clflush_cache_range(va, PAGE_SIZE);
+               return;
++      }
+       /*
+        * If the VM Page Flush MSR is supported, use it to flush the page
diff --git a/queue-5.15/kvm-x86-pend-kvm_req_apicv_update-during-vcpu-creation-to-fix-a-race.patch b/queue-5.15/kvm-x86-pend-kvm_req_apicv_update-during-vcpu-creation-to-fix-a-race.patch
new file mode 100644 (file)
index 0000000..68b3189
--- /dev/null
@@ -0,0 +1,96 @@
+From 423ecfea77dda83823c71b0fad1c2ddb2af1e5fc Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 20 Apr 2022 01:37:31 +0000
+Subject: KVM: x86: Pend KVM_REQ_APICV_UPDATE during vCPU creation to fix a race
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 423ecfea77dda83823c71b0fad1c2ddb2af1e5fc upstream.
+
+Make a KVM_REQ_APICV_UPDATE request when creating a vCPU with an
+in-kernel local APIC and APICv enabled at the module level.  Consuming
+kvm_apicv_activated() and stuffing vcpu->arch.apicv_active directly can
+race with __kvm_set_or_clear_apicv_inhibit(), as vCPU creation happens
+before the vCPU is fully onlined, i.e. it won't get the request made to
+"all" vCPUs.  If APICv is globally inhibited between setting apicv_active
+and onlining the vCPU, the vCPU will end up running with APICv enabled
+and trigger KVM's sanity check.
+
+Mark APICv as active during vCPU creation if APICv is enabled at the
+module level, both to be optimistic about it's final state, e.g. to avoid
+additional VMWRITEs on VMX, and because there are likely bugs lurking
+since KVM checks apicv_active in multiple vCPU creation paths.  While
+keeping the current behavior of consuming kvm_apicv_activated() is
+arguably safer from a regression perspective, force apicv_active so that
+vCPU creation runs with deterministic state and so that if there are bugs,
+they are found sooner than later, i.e. not when some crazy race condition
+is hit.
+
+  WARNING: CPU: 0 PID: 484 at arch/x86/kvm/x86.c:9877 vcpu_enter_guest+0x2ae3/0x3ee0 arch/x86/kvm/x86.c:9877
+  Modules linked in:
+  CPU: 0 PID: 484 Comm: syz-executor361 Not tainted 5.16.13 #2
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1~cloud0 04/01/2014
+  RIP: 0010:vcpu_enter_guest+0x2ae3/0x3ee0 arch/x86/kvm/x86.c:9877
+  Call Trace:
+   <TASK>
+   vcpu_run arch/x86/kvm/x86.c:10039 [inline]
+   kvm_arch_vcpu_ioctl_run+0x337/0x15e0 arch/x86/kvm/x86.c:10234
+   kvm_vcpu_ioctl+0x4d2/0xc80 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3727
+   vfs_ioctl fs/ioctl.c:51 [inline]
+   __do_sys_ioctl fs/ioctl.c:874 [inline]
+   __se_sys_ioctl fs/ioctl.c:860 [inline]
+   __x64_sys_ioctl+0x16d/0x1d0 fs/ioctl.c:860
+   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+   do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+The bug was hit by a syzkaller spamming VM creation with 2 vCPUs and a
+call to KVM_SET_GUEST_DEBUG.
+
+  r0 = openat$kvm(0xffffffffffffff9c, &(0x7f0000000000), 0x0, 0x0)
+  r1 = ioctl$KVM_CREATE_VM(r0, 0xae01, 0x0)
+  ioctl$KVM_CAP_SPLIT_IRQCHIP(r1, 0x4068aea3, &(0x7f0000000000)) (async)
+  r2 = ioctl$KVM_CREATE_VCPU(r1, 0xae41, 0x0) (async)
+  r3 = ioctl$KVM_CREATE_VCPU(r1, 0xae41, 0x400000000000002)
+  ioctl$KVM_SET_GUEST_DEBUG(r3, 0x4048ae9b, &(0x7f00000000c0)={0x5dda9c14aa95f5c5})
+  ioctl$KVM_RUN(r2, 0xae80, 0x0)
+
+Reported-by: Gaoning Pan <pgn@zju.edu.cn>
+Reported-by: Yongkang Jia <kangel@zju.edu.cn>
+Fixes: 8df14af42f00 ("kvm: x86: Add support for dynamic APICv activation")
+Cc: stable@vger.kernel.org
+Cc: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220420013732.3308816-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10813,8 +10813,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu
+               r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+               if (r < 0)
+                       goto fail_mmu_destroy;
+-              if (kvm_apicv_activated(vcpu->kvm))
++
++              /*
++               * Defer evaluating inhibits until the vCPU is first run, as
++               * this vCPU will not get notified of any changes until this
++               * vCPU is visible to other vCPUs (marked online and added to
++               * the set of vCPUs).  Opportunistically mark APICv active as
++               * VMX in particularly is highly unlikely to have inhibits.
++               * Ignore the current per-VM APICv state so that vCPU creation
++               * is guaranteed to run with a deterministic value, the request
++               * will ensure the vCPU gets the correct state before VM-Entry.
++               */
++              if (enable_apicv) {
+                       vcpu->arch.apicv_active = true;
++                      kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++              }
+       } else
+               static_branch_inc(&kvm_has_noapic_vcpu);
diff --git a/queue-5.15/kvm-x86-pmu-update-amd-pmc-sample-period-to-fix-guest-nmi-watchdog.patch b/queue-5.15/kvm-x86-pmu-update-amd-pmc-sample-period-to-fix-guest-nmi-watchdog.patch
new file mode 100644 (file)
index 0000000..c06ee8b
--- /dev/null
@@ -0,0 +1,87 @@
+From 75189d1de1b377e580ebd2d2c55914631eac9c64 Mon Sep 17 00:00:00 2001
+From: Like Xu <likexu@tencent.com>
+Date: Sat, 9 Apr 2022 09:52:26 +0800
+Subject: KVM: x86/pmu: Update AMD PMC sample period to fix guest NMI-watchdog
+
+From: Like Xu <likexu@tencent.com>
+
+commit 75189d1de1b377e580ebd2d2c55914631eac9c64 upstream.
+
+NMI-watchdog is one of the favorite features of kernel developers,
+but it does not work in AMD guest even with vPMU enabled and worse,
+the system misrepresents this capability via /proc.
+
+This is a PMC emulation error. KVM does not pass the latest valid
+value to perf_event in time when guest NMI-watchdog is running, thus
+the perf_event corresponding to the watchdog counter will enter the
+old state at some point after the first guest NMI injection, forcing
+the hardware register PMC0 to be constantly written to 0x800000000001.
+
+Meanwhile, the running counter should accurately reflect its new value
+based on the latest coordinated pmc->counter (from vPMC's point of view)
+rather than the value written directly by the guest.
+
+Fixes: 168d918f2643 ("KVM: x86: Adjust counter sample period after a wrmsr")
+Reported-by: Dongli Cao <caodongli@kingsoft.com>
+Signed-off-by: Like Xu <likexu@tencent.com>
+Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
+Tested-by: Yanan Wang <wangyanan55@huawei.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Message-Id: <20220409015226.38619-1-likexu@tencent.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/pmu.h           |    9 +++++++++
+ arch/x86/kvm/svm/pmu.c       |    1 +
+ arch/x86/kvm/vmx/pmu_intel.c |    8 ++------
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/pmu.h
++++ b/arch/x86/kvm/pmu.h
+@@ -141,6 +141,15 @@ static inline u64 get_sample_period(stru
+       return sample_period;
+ }
++static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
++{
++      if (!pmc->perf_event || pmc->is_paused)
++              return;
++
++      perf_event_period(pmc->perf_event,
++                        get_sample_period(pmc, pmc->counter));
++}
++
+ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
+ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
+ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -256,6 +256,7 @@ static int amd_pmu_set_msr(struct kvm_vc
+       pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
+       if (pmc) {
+               pmc->counter += data - pmc_read_counter(pmc);
++              pmc_update_sample_period(pmc);
+               return 0;
+       }
+       /* MSR_EVNTSELn */
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -439,15 +439,11 @@ static int intel_pmu_set_msr(struct kvm_
+                           !(msr & MSR_PMC_FULL_WIDTH_BIT))
+                               data = (s64)(s32)data;
+                       pmc->counter += data - pmc_read_counter(pmc);
+-                      if (pmc->perf_event && !pmc->is_paused)
+-                              perf_event_period(pmc->perf_event,
+-                                                get_sample_period(pmc, data));
++                      pmc_update_sample_period(pmc);
+                       return 0;
+               } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+                       pmc->counter += data - pmc_read_counter(pmc);
+-                      if (pmc->perf_event && !pmc->is_paused)
+-                              perf_event_period(pmc->perf_event,
+-                                                get_sample_period(pmc, data));
++                      pmc_update_sample_period(pmc);
+                       return 0;
+               } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
+                       if (data == pmc->eventsel)
index bfe7e291c347033e0aca5b00bb0138f303695f9f..fa0e7b5523241baeca2ebe48f065ab7be2570527 100644 (file)
@@ -103,3 +103,8 @@ gpio-request-interrupts-after-irq-is-initialized.patch
 asoc-soc-dapm-fix-two-incorrect-uses-of-list-iterator.patch
 e1000e-fix-possible-overflow-in-ltr-decoding.patch
 arc-entry-fix-syscall_trace_exit-argument.patch
+arm_pmu-validate-single-group-leader-events.patch
+kvm-x86-pmu-update-amd-pmc-sample-period-to-fix-guest-nmi-watchdog.patch
+kvm-x86-pend-kvm_req_apicv_update-during-vcpu-creation-to-fix-a-race.patch
+kvm-nvmx-defer-apicv-updates-while-l2-is-active-until-l1-is-active.patch
+kvm-svm-flush-when-freeing-encrypted-pages-even-on-sme_coherent-cpus.patch