From: Greg Kroah-Hartman Date: Mon, 15 May 2023 12:48:49 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v4.14.315~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c47f8b9d5d3c7cc956fa3329bcf828c3fce51eb4;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: drbd-correctly-submit-flush-bio-on-barrier.patch kvm-fix-steal-time-asm-constraints.patch kvm-x86-do-not-report-preemption-if-the-steal-time-cache-is-stale.patch kvm-x86-do-not-set-st-preempted-when-going-back-to-user-space.patch kvm-x86-ensure-pv-tlb-flush-tracepoint-reflects-kvm-behavior.patch kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch kvm-x86-move-guest_pv_has-out-of-user_access-section.patch kvm-x86-remove-obsolete-disabling-of-page-faults-in-kvm_arch_vcpu_put.patch kvm-x86-revalidate-steal-time-cache-if-msr-value-changes.patch serial-8250-fix-serial8250_tx_empty-race-with-dma-tx.patch --- diff --git a/queue-5.10/drbd-correctly-submit-flush-bio-on-barrier.patch b/queue-5.10/drbd-correctly-submit-flush-bio-on-barrier.patch new file mode 100644 index 00000000000..92a5a275708 --- /dev/null +++ b/queue-5.10/drbd-correctly-submit-flush-bio-on-barrier.patch @@ -0,0 +1,49 @@ +From 3899d94e3831ee07ea6821c032dc297aec80586a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christoph=20B=C3=B6hmwalder?= + +Date: Wed, 3 May 2023 14:19:37 +0200 +Subject: drbd: correctly submit flush bio on barrier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christoph Böhmwalder + +commit 3899d94e3831ee07ea6821c032dc297aec80586a upstream. + +When we receive a flush command (or "barrier" in DRBD), we currently use +a REQ_OP_FLUSH with the REQ_PREFLUSH flag set. + +The correct way to submit a flush bio is by using a REQ_OP_WRITE without +any data, and set the REQ_PREFLUSH flag. + +Since commit b4a6bb3a67aa ("block: add a sanity check for non-write +flush/fua bios"), this triggers a warning in the block layer, but this +has been broken for quite some time before that. + +So use the correct set of flags to actually make the flush happen. + +Cc: Christoph Hellwig +Cc: stable@vger.kernel.org +Fixes: f9ff0da56437 ("drbd: allow parallel flushes for multi-volume resources") +Reported-by: Thomas Voegtle +Signed-off-by: Christoph Böhmwalder +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20230503121937.17232-1-christoph.boehmwalder@linbit.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/drbd/drbd_receiver.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/block/drbd/drbd_receiver.c ++++ b/drivers/block/drbd/drbd_receiver.c +@@ -1299,7 +1299,7 @@ static void submit_one_flush(struct drbd + bio_set_dev(bio, device->ldev->backing_bdev); + bio->bi_private = octx; + bio->bi_end_io = one_flush_endio; +- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; ++ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + device->flush_jif = jiffies; + set_bit(FLUSH_PENDING, &device->flags); diff --git a/queue-5.10/kvm-fix-steal-time-asm-constraints.patch b/queue-5.10/kvm-fix-steal-time-asm-constraints.patch new file mode 100644 index 00000000000..5871033b162 --- /dev/null +++ b/queue-5.10/kvm-fix-steal-time-asm-constraints.patch @@ -0,0 +1,50 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:36 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:41 +0000 +Subject: KVM: Fix steal time asm constraints +To: , +Cc: , , , , , , , , , , , David Woodhouse , kernel test robot , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-4-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: David Woodhouse + +commit 964b7aa0b040bdc6ec1c543ee620cda3f8b4c68a upstream. + +In 64-bit mode, x86 instruction encoding allows us to use the low 8 bits +of any GPR as an 8-bit operand. In 32-bit mode, however, we can only use +the [abcd] registers. For which, GCC has the "q" constraint instead of +the less restrictive "r". + +Also fix st->preempted, which is an input/output operand rather than an +input. + +Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") +Reported-by: kernel test robot +Signed-off-by: David Woodhouse +Message-Id: <89bf72db1b859990355f9c40713a34e0d2d86c98.camel@infradead.org> +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3064,9 +3064,9 @@ static void record_steal_time(struct kvm + "xor %1, %1\n" + "2:\n" + _ASM_EXTABLE_UA(1b, 2b) +- : "+r" (st_preempted), +- "+&r" (err) +- : "m" (st->preempted)); ++ : "+q" (st_preempted), ++ "+&r" (err), ++ "+m" (st->preempted)); + if (err) + goto out; + diff --git a/queue-5.10/kvm-x86-do-not-report-preemption-if-the-steal-time-cache-is-stale.patch b/queue-5.10/kvm-x86-do-not-report-preemption-if-the-steal-time-cache-is-stale.patch new file mode 100644 index 00000000000..5c299e1c32c --- /dev/null +++ b/queue-5.10/kvm-x86-do-not-report-preemption-if-the-steal-time-cache-is-stale.patch @@ -0,0 +1,53 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:28 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:46 +0000 +Subject: KVM: x86: do not report preemption if the steal time cache is stale +To: , +Cc: , , , , , , , , , , , David Woodhouse , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-9-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Paolo Bonzini + +commit c3c28d24d910a746b02f496d190e0e8c6560224b upstream. + +Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time +/ preempted status", 2021-11-11) open coded the previous call to +kvm_map_gfn, but in doing so it dropped the comparison between the cached +guest physical address and the one in the MSR. This cause an incorrect +cache hit if the guest modifies the steal time address while the memslots +remain the same. This can happen with kexec, in which case the preempted +bit is written at the address used by the old kernel instead of +the old one. + +Cc: David Woodhouse +Cc: stable@vger.kernel.org +Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4096,6 +4096,7 @@ static void kvm_steal_time_set_preempted + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; + static const u8 preempted = KVM_VCPU_PREEMPTED; ++ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; + + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on +@@ -4123,6 +4124,7 @@ static void kvm_steal_time_set_preempted + slots = kvm_memslots(vcpu->kvm); + + if (unlikely(slots->generation != ghc->generation || ++ gpa != ghc->gpa || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) + return; + diff --git a/queue-5.10/kvm-x86-do-not-set-st-preempted-when-going-back-to-user-space.patch b/queue-5.10/kvm-x86-do-not-set-st-preempted-when-going-back-to-user-space.patch new file mode 100644 index 00000000000..03e974a0eef --- /dev/null +++ b/queue-5.10/kvm-x86-do-not-set-st-preempted-when-going-back-to-user-space.patch @@ -0,0 +1,61 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:31 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:43 +0000 +Subject: KVM: x86: do not set st->preempted when going back to user space +To: , +Cc: , , , , , , , , , , , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-6-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Paolo Bonzini + +commit 54aa83c90198e68eee8b0850c749bc70efb548da upstream. + +Similar to the Xen path, only change the vCPU's reported state if the vCPU +was actually preempted. The reason for KVM's behavior is that for example +optimistic spinning might not be a good idea if the guest is doing repeated +exits to userspace; however, it is confusing and unlikely to make a difference, +because well-tuned guests will hardly ever exit KVM_RUN in the first place. + +Suggested-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +[risbhat@amazon.com: Don't check for xen msr as support is not available +and skip the SEV-ES condition] +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4139,16 +4139,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * + { + int idx; + +- if (vcpu->preempted) ++ if (vcpu->preempted) { + vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); + +- /* +- * kvm_memslots() will be called by +- * kvm_write_guest_offset_cached() so take the srcu lock. +- */ +- idx = srcu_read_lock(&vcpu->kvm->srcu); +- kvm_steal_time_set_preempted(vcpu); +- srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ /* ++ * Take the srcu lock as memslots will be accessed to check the gfn ++ * cache generation against the memslots generation. ++ */ ++ idx = srcu_read_lock(&vcpu->kvm->srcu); ++ kvm_steal_time_set_preempted(vcpu); ++ srcu_read_unlock(&vcpu->kvm->srcu, idx); ++ } ++ + kvm_x86_ops.vcpu_put(vcpu); + vcpu->arch.last_host_tsc = rdtsc(); + /* diff --git a/queue-5.10/kvm-x86-ensure-pv-tlb-flush-tracepoint-reflects-kvm-behavior.patch b/queue-5.10/kvm-x86-ensure-pv-tlb-flush-tracepoint-reflects-kvm-behavior.patch new file mode 100644 index 00000000000..1f2dc4b3105 --- /dev/null +++ b/queue-5.10/kvm-x86-ensure-pv-tlb-flush-tracepoint-reflects-kvm-behavior.patch @@ -0,0 +1,49 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:27 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:39 +0000 +Subject: KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior +To: , +Cc: , , , , , , , , , , , Lai Jiangshan , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-2-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Lai Jiangshan + +commit af3511ff7fa2107d6410831f3d71030f5e8d2b25 upstream. + +In record_steal_time(), st->preempted is read twice, and +trace_kvm_pv_tlb_flush() might output result inconsistent if +kvm_vcpu_flush_tlb_guest() see a different st->preempted later. + +It is a very trivial problem and hardly has actual harm and can be +avoided by reseting and reading st->preempted in atomic way via xchg(). + +Signed-off-by: Lai Jiangshan + +Message-Id: <20210531174628.10265-1-jiangshanlai@gmail.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3041,9 +3041,11 @@ static void record_steal_time(struct kvm + * expensive IPIs. + */ + if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { ++ u8 st_preempted = xchg(&st->preempted, 0); ++ + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, +- st->preempted & KVM_VCPU_FLUSH_TLB); +- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) ++ st_preempted & KVM_VCPU_FLUSH_TLB); ++ if (st_preempted & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb_guest(vcpu); + } else { + st->preempted = 0; diff --git a/queue-5.10/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch b/queue-5.10/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch new file mode 100644 index 00000000000..8ee74819696 --- /dev/null +++ b/queue-5.10/kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch @@ -0,0 +1,243 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:24 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:40 +0000 +Subject: KVM: x86: Fix recording of guest steal time / preempted status +To: , +Cc: , , , , , , , , , , , David Woodhouse , David Woodhouse , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-3-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: David Woodhouse + +commit 7e2175ebd695f17860c5bd4ad7616cce12ed4591 upstream. + +In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is +not missed") we switched to using a gfn_to_pfn_cache for accessing the +guest steal time structure in order to allow for an atomic xchg of the +preempted field. This has a couple of problems. + +Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the +atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a +guest vCPU using an IOMEM page for its steal time would never have its +preempted field set. + +Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it +should have been. There are two stages to the GFN->PFN conversion; +first the GFN is converted to a userspace HVA, and then that HVA is +looked up in the process page tables to find the underlying host PFN. +Correct invalidation of the latter would require being hooked up to the +MMU notifiers, but that doesn't happen---so it just keeps mapping and +unmapping the *wrong* PFN after the userspace page tables change. + +In the !IOMEM case at least the stale page *is* pinned all the time it's +cached, so it won't be freed and reused by anyone else while still +receiving the steal time updates. The map/unmap dance only takes care +of the KVM administrivia such as marking the page dirty. + +Until the gfn_to_pfn cache handles the remapping automatically by +integrating with the MMU notifiers, we might as well not get a +kernel mapping of it, and use the perfectly serviceable userspace HVA +that we already have. We just need to implement the atomic xchg on +the userspace address with appropriate exception handling, which is +fairly trivial. + +Cc: stable@vger.kernel.org +Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed") +Signed-off-by: David Woodhouse +Message-Id: <3645b9b889dac6438394194bb5586a46b68d581f.camel@infradead.org> +[I didn't entirely agree with David's assessment of the + usefulness of the gfn_to_pfn cache, and integrated the outcome + of the discussion in the above commit message. - Paolo] +Signed-off-by: Paolo Bonzini +[risbhat@amazon.com: Use the older mark_page_dirty_in_slot api without +kvm argument] +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/x86.c | 105 ++++++++++++++++++++++++++++------------ + 2 files changed, 76 insertions(+), 31 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -664,7 +664,7 @@ struct kvm_vcpu_arch { + u8 preempted; + u64 msr_val; + u64 last_steal; +- struct gfn_to_pfn_cache cache; ++ struct gfn_to_hva_cache cache; + } st; + + u64 l1_tsc_offset; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3022,53 +3022,92 @@ static void kvm_vcpu_flush_tlb_guest(str + + static void record_steal_time(struct kvm_vcpu *vcpu) + { +- struct kvm_host_map map; +- struct kvm_steal_time *st; ++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; ++ struct kvm_steal_time __user *st; ++ struct kvm_memslots *slots; ++ u64 steal; ++ u32 version; + + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +- /* -EAGAIN is returned in atomic context so we can just return. */ +- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, +- &map, &vcpu->arch.st.cache, false)) ++ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm)) + return; + +- st = map.hva + +- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ slots = kvm_memslots(vcpu->kvm); ++ ++ if (unlikely(slots->generation != ghc->generation || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { ++ gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; ++ ++ /* We rely on the fact that it fits in a single page. */ ++ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); ++ ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot) ++ return; ++ } ++ ++ st = (struct kvm_steal_time __user *)ghc->hva; ++ if (!user_access_begin(st, sizeof(*st))) ++ return; + + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. + */ + if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) { +- u8 st_preempted = xchg(&st->preempted, 0); ++ u8 st_preempted = 0; ++ int err = -EFAULT; ++ ++ asm volatile("1: xchgb %0, %2\n" ++ "xor %1, %1\n" ++ "2:\n" ++ _ASM_EXTABLE_UA(1b, 2b) ++ : "+r" (st_preempted), ++ "+&r" (err) ++ : "m" (st->preempted)); ++ if (err) ++ goto out; ++ ++ user_access_end(); ++ ++ vcpu->arch.st.preempted = 0; + + trace_kvm_pv_tlb_flush(vcpu->vcpu_id, + st_preempted & KVM_VCPU_FLUSH_TLB); + if (st_preempted & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb_guest(vcpu); ++ ++ if (!user_access_begin(st, sizeof(*st))) ++ goto dirty; + } else { +- st->preempted = 0; ++ unsafe_put_user(0, &st->preempted, out); ++ vcpu->arch.st.preempted = 0; + } + +- vcpu->arch.st.preempted = 0; +- +- if (st->version & 1) +- st->version += 1; /* first time write, random junk */ ++ unsafe_get_user(version, &st->version, out); ++ if (version & 1) ++ version += 1; /* first time write, random junk */ + +- st->version += 1; ++ version += 1; ++ unsafe_put_user(version, &st->version, out); + + smp_wmb(); + +- st->steal += current->sched_info.run_delay - ++ unsafe_get_user(steal, &st->steal, out); ++ steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; ++ unsafe_put_user(steal, &st->steal, out); + +- smp_wmb(); +- +- st->version += 1; ++ version += 1; ++ unsafe_put_user(version, &st->version, out); + +- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); ++ out: ++ user_access_end(); ++ dirty: ++ mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa)); + } + + int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -4053,8 +4092,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu + + static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) + { +- struct kvm_host_map map; +- struct kvm_steal_time *st; ++ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; ++ struct kvm_steal_time __user *st; ++ struct kvm_memslots *slots; ++ static const u8 preempted = KVM_VCPU_PREEMPTED; + + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on +@@ -4075,16 +4116,23 @@ static void kvm_steal_time_set_preempted + if (vcpu->arch.st.preempted) + return; + +- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, +- &vcpu->arch.st.cache, true)) ++ /* This happens on process exit */ ++ if (unlikely(current->mm != vcpu->kvm->mm)) + return; + +- st = map.hva + +- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); ++ slots = kvm_memslots(vcpu->kvm); ++ ++ if (unlikely(slots->generation != ghc->generation || ++ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) ++ return; + +- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; ++ st = (struct kvm_steal_time __user *)ghc->hva; ++ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); + +- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); ++ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) ++ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; ++ ++ mark_page_dirty_in_slot(ghc->memslot, gpa_to_gfn(ghc->gpa)); + } + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +@@ -10266,11 +10314,8 @@ void kvm_arch_vcpu_postcreate(struct kvm + + void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) + { +- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + int idx; + +- kvm_release_pfn(cache->pfn, cache->dirty, cache); +- + kvmclock_reset(vcpu); + + kvm_x86_ops.vcpu_free(vcpu); diff --git a/queue-5.10/kvm-x86-move-guest_pv_has-out-of-user_access-section.patch b/queue-5.10/kvm-x86-move-guest_pv_has-out-of-user_access-section.patch new file mode 100644 index 00000000000..464713c8902 --- /dev/null +++ b/queue-5.10/kvm-x86-move-guest_pv_has-out-of-user_access-section.patch @@ -0,0 +1,63 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:44 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:47 +0000 +Subject: KVM: x86: move guest_pv_has out of user_access section +To: , +Cc: , , , , , , , , , , , Stephen Rothwell , David Woodhouse , "Rishabh Bhatnagar" , Allen Pais +Message-ID: <20230510181547.22451-10-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Paolo Bonzini + +commit 3e067fd8503d6205aa0c1c8f48f6b209c592d19c upstream. + +When UBSAN is enabled, the code emitted for the call to guest_pv_has +includes a call to __ubsan_handle_load_invalid_value. objtool +complains that this call happens with UACCESS enabled; to avoid +the warning, pull the calls to user_access_begin into both arms +of the "if" statement, after the check for guest_pv_has. + +Reported-by: Stephen Rothwell +Cc: David Woodhouse +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3049,9 +3049,6 @@ static void record_steal_time(struct kvm + } + + st = (struct kvm_steal_time __user *)ghc->hva; +- if (!user_access_begin(st, sizeof(*st))) +- return; +- + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. +@@ -3060,6 +3057,9 @@ static void record_steal_time(struct kvm + u8 st_preempted = 0; + int err = -EFAULT; + ++ if (!user_access_begin(st, sizeof(*st))) ++ return; ++ + asm volatile("1: xchgb %0, %2\n" + "xor %1, %1\n" + "2:\n" +@@ -3082,6 +3082,9 @@ static void record_steal_time(struct kvm + if (!user_access_begin(st, sizeof(*st))) + goto dirty; + } else { ++ if (!user_access_begin(st, sizeof(*st))) ++ return; ++ + unsafe_put_user(0, &st->preempted, out); + vcpu->arch.st.preempted = 0; + } diff --git a/queue-5.10/kvm-x86-remove-obsolete-disabling-of-page-faults-in-kvm_arch_vcpu_put.patch b/queue-5.10/kvm-x86-remove-obsolete-disabling-of-page-faults-in-kvm_arch_vcpu_put.patch new file mode 100644 index 00000000000..46b0c0e29f3 --- /dev/null +++ b/queue-5.10/kvm-x86-remove-obsolete-disabling-of-page-faults-in-kvm_arch_vcpu_put.patch @@ -0,0 +1,57 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:27 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:42 +0000 +Subject: KVM: x86: Remove obsolete disabling of page faults in kvm_arch_vcpu_put() +To: , +Cc: , , , , , , , , , , , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-5-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Sean Christopherson + +commit 19979fba9bfaeab427a8e106d915f0627c952828 upstream. + +Remove the disabling of page faults across kvm_steal_time_set_preempted() +as KVM now accesses the steal time struct (shared with the guest) via a +cached mapping (see commit b043138246a4, "x86/KVM: Make sure +KVM_VCPU_FLUSH_TLB flag is not missed".) The cache lookup is flagged as +atomic, thus it would be a bug if KVM tried to resolve a new pfn, i.e. +we want the splat that would be reached via might_fault(). + +Signed-off-by: Sean Christopherson +Message-Id: <20210123000334.3123628-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 10 ---------- + 1 file changed, 10 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4143,22 +4143,12 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * + vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); + + /* +- * Disable page faults because we're in atomic context here. +- * kvm_write_guest_offset_cached() would call might_fault() +- * that relies on pagefault_disable() to tell if there's a +- * bug. NOTE: the write to guest memory may not go through if +- * during postcopy live migration or if there's heavy guest +- * paging. +- */ +- pagefault_disable(); +- /* + * kvm_memslots() will be called by + * kvm_write_guest_offset_cached() so take the srcu lock. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); +- pagefault_enable(); + kvm_x86_ops.vcpu_put(vcpu); + vcpu->arch.last_host_tsc = rdtsc(); + /* diff --git a/queue-5.10/kvm-x86-revalidate-steal-time-cache-if-msr-value-changes.patch b/queue-5.10/kvm-x86-revalidate-steal-time-cache-if-msr-value-changes.patch new file mode 100644 index 00000000000..654aacbe5a2 --- /dev/null +++ b/queue-5.10/kvm-x86-revalidate-steal-time-cache-if-msr-value-changes.patch @@ -0,0 +1,67 @@ +From stable-owner@vger.kernel.org Wed May 10 20:16:44 2023 +From: Rishabh Bhatnagar +Date: Wed, 10 May 2023 18:15:45 +0000 +Subject: KVM: x86: revalidate steal time cache if MSR value changes +To: , +Cc: , , , , , , , , , , , Dave Young , Xiaoying Yan , "Dr . David Alan Gilbert" , David Woodhouse , Rishabh Bhatnagar , Allen Pais +Message-ID: <20230510181547.22451-8-risbhat@amazon.com> + +From: Rishabh Bhatnagar + +From: Paolo Bonzini + +commit 901d3765fa804ce42812f1d5b1f3de2dfbb26723 upstream. + +Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time +/ preempted status", 2021-11-11) open coded the previous call to +kvm_map_gfn, but in doing so it dropped the comparison between the cached +guest physical address and the one in the MSR. This cause an incorrect +cache hit if the guest modifies the steal time address while the memslots +remain the same. This can happen with kexec, in which case the steal +time data is written at the address used by the old kernel instead of +the old one. + +While at it, rename the variable from gfn to gpa since it is a plain +physical address and not a right-shifted one. + +Reported-by: Dave Young +Reported-by: Xiaoying Yan +Analyzed-by: Dr. David Alan Gilbert +Cc: David Woodhouse +Cc: stable@vger.kernel.org +Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") +Signed-off-by: Paolo Bonzini +Signed-off-by: Rishabh Bhatnagar +Tested-by: Allen Pais +Acked-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3025,6 +3025,7 @@ static void record_steal_time(struct kvm + struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; + struct kvm_steal_time __user *st; + struct kvm_memslots *slots; ++ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; + u64 steal; + u32 version; + +@@ -3037,13 +3038,12 @@ static void record_steal_time(struct kvm + slots = kvm_memslots(vcpu->kvm); + + if (unlikely(slots->generation != ghc->generation || ++ gpa != ghc->gpa || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { +- gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; +- + /* We rely on the fact that it fits in a single page. */ + BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); + +- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || ++ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) || + kvm_is_error_hva(ghc->hva) || !ghc->memslot) + return; + } diff --git a/queue-5.10/serial-8250-fix-serial8250_tx_empty-race-with-dma-tx.patch b/queue-5.10/serial-8250-fix-serial8250_tx_empty-race-with-dma-tx.patch new file mode 100644 index 00000000000..abd2ec7a8b3 --- /dev/null +++ b/queue-5.10/serial-8250-fix-serial8250_tx_empty-race-with-dma-tx.patch @@ -0,0 +1,98 @@ +From 146a37e05d620cef4ad430e5d1c9c077fe6fa76f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= +Date: Fri, 17 Mar 2023 13:33:18 +0200 +Subject: serial: 8250: Fix serial8250_tx_empty() race with DMA Tx +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +commit 146a37e05d620cef4ad430e5d1c9c077fe6fa76f upstream. + +There's a potential race before THRE/TEMT deasserts when DMA Tx is +starting up (or the next batch of continuous Tx is being submitted). +This can lead to misdetecting Tx empty condition. + +It is entirely normal for THRE/TEMT to be set for some time after the +DMA Tx had been setup in serial8250_tx_dma(). As Tx side is definitely +not empty at that point, it seems incorrect for serial8250_tx_empty() +claim Tx is empty. + +Fix the race by also checking in serial8250_tx_empty() whether there's +DMA Tx active. + +Note: This fix only addresses in-kernel race mainly to make using +TCSADRAIN/FLUSH robust. Userspace can still cause other races but they +seem userspace concurrency control problems. + +Fixes: 9ee4b83e51f74 ("serial: 8250: Add support for dmaengine") +Cc: stable@vger.kernel.org +Signed-off-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/20230317113318.31327-3-ilpo.jarvinen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Greg Kroah-Hartman +--- + drivers/tty/serial/8250/8250.h | 12 ++++++++++++ + drivers/tty/serial/8250/8250_port.c | 12 +++++++++--- + 2 files changed, 21 insertions(+), 3 deletions(-) + +--- a/drivers/tty/serial/8250/8250.h ++++ b/drivers/tty/serial/8250/8250.h +@@ -330,6 +330,13 @@ extern int serial8250_rx_dma(struct uart + extern void serial8250_rx_dma_flush(struct uart_8250_port *); + extern int serial8250_request_dma(struct uart_8250_port *); + extern void serial8250_release_dma(struct uart_8250_port *); ++ ++static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) ++{ ++ struct uart_8250_dma *dma = p->dma; ++ ++ return dma && dma->tx_running; ++} + #else + static inline int serial8250_tx_dma(struct uart_8250_port *p) + { +@@ -345,6 +352,11 @@ static inline int serial8250_request_dma + return -1; + } + static inline void serial8250_release_dma(struct uart_8250_port *p) { } ++ ++static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) ++{ ++ return false; ++} + #endif + + static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -1971,19 +1971,25 @@ static int serial8250_tx_threshold_handl + static unsigned int serial8250_tx_empty(struct uart_port *port) + { + struct uart_8250_port *up = up_to_u8250p(port); ++ unsigned int result = 0; + unsigned long flags; + unsigned int lsr; + + serial8250_rpm_get(up); + + spin_lock_irqsave(&port->lock, flags); +- lsr = serial_port_in(port, UART_LSR); +- up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; ++ if (!serial8250_tx_dma_running(up)) { ++ lsr = serial_port_in(port, UART_LSR); ++ up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; ++ ++ if ((lsr & BOTH_EMPTY) == BOTH_EMPTY) ++ result = TIOCSER_TEMT; ++ } + spin_unlock_irqrestore(&port->lock, flags); + + serial8250_rpm_put(up); + +- return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0; ++ return result; + } + + unsigned int serial8250_do_get_mctrl(struct uart_port *port) diff --git a/queue-5.10/series b/queue-5.10/series index 25ab15a14fe..5b0be21684a 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -365,3 +365,13 @@ ext4-add-bounds-checking-in-get_max_inline_xattr_value_size.patch ext4-bail-out-of-ext4_xattr_ibody_get-fails-for-any-reason.patch ext4-remove-a-bug_on-in-ext4_mb_release_group_pa.patch ext4-fix-invalid-free-tracking-in-ext4_xattr_move_to_block.patch +serial-8250-fix-serial8250_tx_empty-race-with-dma-tx.patch +drbd-correctly-submit-flush-bio-on-barrier.patch +kvm-x86-ensure-pv-tlb-flush-tracepoint-reflects-kvm-behavior.patch +kvm-x86-fix-recording-of-guest-steal-time-preempted-status.patch +kvm-fix-steal-time-asm-constraints.patch +kvm-x86-remove-obsolete-disabling-of-page-faults-in-kvm_arch_vcpu_put.patch +kvm-x86-do-not-set-st-preempted-when-going-back-to-user-space.patch +kvm-x86-revalidate-steal-time-cache-if-msr-value-changes.patch +kvm-x86-do-not-report-preemption-if-the-steal-time-cache-is-stale.patch +kvm-x86-move-guest_pv_has-out-of-user_access-section.patch