From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 28 Nov 2020 12:51:18 +0000 (+0100)
Subject: 4.9-stable patches
X-Git-Tag: v4.4.247~33
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=fd43676dd340addb55a1487fea09d17693acc265;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	arm64-pgtable-fix-pte_accessible.patch
	kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
---

diff --git a/queue-4.9/arm64-pgtable-fix-pte_accessible.patch b/queue-4.9/arm64-pgtable-fix-pte_accessible.patch
new file mode 100644
index 00000000000..94c4f56c9ac
--- /dev/null
+++ b/queue-4.9/arm64-pgtable-fix-pte_accessible.patch
@@ -0,0 +1,59 @@
+From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will@kernel.org>
+Date: Fri, 20 Nov 2020 13:28:01 +0000
+Subject: arm64: pgtable: Fix pte_accessible()
+
+From: Will Deacon <will@kernel.org>
+
+commit 07509e10dcc77627f8b6a57381e878fe269958d3 upstream.
+
+pte_accessible() is used by ptep_clear_flush() to figure out whether TLB
+invalidation is necessary when unmapping pages for reclaim. Although our
+implementation is correct according to the architecture, returning true
+only for valid, young ptes in the absence of racing page-table
+modifications, this is in fact flawed due to lazy invalidation of old
+ptes in ptep_clear_flush_young() where we elide the expensive DSB
+instruction for completing the TLB invalidation.
+
+Rather than penalise the aging path, adjust pte_accessible() to return
+true for any valid pte, even if the access flag is cleared.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()")
+Reported-by: Yu Zhao <yuzhao@google.com>
+Acked-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/pgtable.h |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -85,8 +85,6 @@ extern unsigned long empty_zero_page[PAG
+ #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
+ #define pte_valid_not_user(pte) \
+ 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+-#define pte_valid_young(pte) \
+-	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+ #define pte_valid_user(pte) \
+ 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
+ 
+@@ -94,9 +92,12 @@ extern unsigned long empty_zero_page[PAG
+  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
+  * so that we don't erroneously return false for pages that have been
+  * remapped as PROT_NONE but are yet to be flushed from the TLB.
++ * Note that we can't make any assumptions based on the state of the access
++ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
++ * TLB.
+  */
+ #define pte_accessible(mm, pte)	\
+-	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
++	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+ 
+ /*
+  * p??_access_permitted() is true for valid user mappings (subject to the
diff --git a/queue-4.9/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch b/queue-4.9/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
new file mode 100644
index 00000000000..b0174a45389
--- /dev/null
+++ b/queue-4.9/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
@@ -0,0 +1,139 @@
+From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 27 Nov 2020 09:18:20 +0100
+Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 71cc849b7093bb83af966c0e60cb11b7f35cd746 upstream.
+
+kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are
+a hodge-podge of conditions, hacked together to get something that
+more or less works.  But what is actually needed is much simpler;
+in both cases the fundamental question is, do we have a place to stash
+an interrupt if userspace does KVM_INTERRUPT?
+
+In userspace irqchip mode, that is !vcpu->arch.interrupt.injected.
+Currently kvm_event_needs_reinjection(vcpu) covers it, but it is
+unnecessarily restrictive.
+
+In split irqchip mode it's a bit more complicated, we need to check
+kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK
+cycle and thus requires ExtINTs not to be masked) as well as
+!pending_userspace_extint(vcpu).  However, there is no need to
+check kvm_event_needs_reinjection(vcpu), since split irqchip keeps
+pending ExtINT state separate from event injection state, and checking
+kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher
+priority than APIC interrupts.  In fact the latter fixes a bug:
+when userspace requests an IRQ window vmexit, an interrupt in the
+local APIC can cause kvm_cpu_has_interrupt() to be true and thus
+kvm_vcpu_ready_for_interrupt_injection() to return false.  When this
+happens, vcpu_run does not exit to userspace but the interrupt window
+vmexits keep occurring.  The VM loops without any hope of making progress.
+
+Once we try to fix these with something like
+
+     return kvm_arch_interrupt_allowed(vcpu) &&
+-        !kvm_cpu_has_interrupt(vcpu) &&
+-        !kvm_event_needs_reinjection(vcpu) &&
+-        kvm_cpu_accept_dm_intr(vcpu);
++        (!lapic_in_kernel(vcpu)
++         ? !vcpu->arch.interrupt.injected
++         : (kvm_apic_accept_pic_intr(vcpu)
++            && !pending_userspace_extint(v)));
+
+we realize two things.  First, thanks to the previous patch the complex
+conditional can reuse !kvm_cpu_has_extint(vcpu).  Second, the interrupt
+window request in vcpu_enter_guest()
+
+        bool req_int_win =
+                dm_request_for_irq_injection(vcpu) &&
+                kvm_cpu_accept_dm_intr(vcpu);
+
+should be kept in sync with kvm_vcpu_ready_for_interrupt_injection():
+it is unnecessary to ask the processor for an interrupt window
+if we would not be able to return to userspace.  Therefore,
+kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu)
+ANDed with the existing check for masked ExtINT.  It all makes sense:
+
+- we can accept an interrupt from userspace if there is a place
+  to stash it (and, for irqchip split, ExtINTs are not masked).
+  Interrupts from userspace _can_ be accepted even if right now
+  EFLAGS.IF=0.
+
+- in order to tell userspace we will inject its interrupt ("IRQ
+  window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both
+  KVM and the vCPU need to be ready to accept the interrupt.
+
+... and this is what the patch implements.
+
+Reported-by: David Woodhouse <dwmw@amazon.co.uk>
+Analyzed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Tested-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/irq.c              |    2 +-
+ arch/x86/kvm/x86.c              |   18 ++++++++++--------
+ 3 files changed, 12 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1350,6 +1350,7 @@ int kvm_test_age_hva(struct kvm *kvm, un
+ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
++int kvm_cpu_has_extint(struct kvm_vcpu *v);
+ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+--- a/arch/x86/kvm/irq.c
++++ b/arch/x86/kvm/irq.c
+@@ -52,7 +52,7 @@ static int pending_userspace_extint(stru
+  * check if there is pending interrupt from
+  * non-APIC source without intack.
+  */
+-static int kvm_cpu_has_extint(struct kvm_vcpu *v)
++int kvm_cpu_has_extint(struct kvm_vcpu *v)
+ {
+ 	u8 accept = kvm_apic_accept_pic_intr(v);
+ 
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3053,21 +3053,23 @@ static int kvm_vcpu_ioctl_set_lapic(stru
+ 
+ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+ {
++	/*
++	 * We can accept userspace's request for interrupt injection
++	 * as long as we have a place to store the interrupt number.
++	 * The actual injection will happen when the CPU is able to
++	 * deliver the interrupt.
++	 */
++	if (kvm_cpu_has_extint(vcpu))
++		return false;
++
++	/* Acknowledging ExtINT does not happen if LINT0 is masked.  */
+ 	return (!lapic_in_kernel(vcpu) ||
+ 		kvm_apic_accept_pic_intr(vcpu));
+ }
+ 
+-/*
+- * if userspace requested an interrupt window, check that the
+- * interrupt window is open.
+- *
+- * No need to exit to userspace if we already have an interrupt queued.
+- */
+ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+ {
+ 	return kvm_arch_interrupt_allowed(vcpu) &&
+-		!kvm_cpu_has_interrupt(vcpu) &&
+-		!kvm_event_needs_reinjection(vcpu) &&
+ 		kvm_cpu_accept_dm_intr(vcpu);
+ }
+ 
diff --git a/queue-4.9/series b/queue-4.9/series
index 0b9d61b04b3..14ec1fef273 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -4,3 +4,5 @@ btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch
 pci-add-device-even-if-driver-attach-failed.patch
 btrfs-tree-checker-enhance-chunk-checker-to-validate-chunk-profile.patch
 btrfs-inode-verify-inode-mode-to-avoid-null-pointer-dereference.patch
+kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
+arm64-pgtable-fix-pte_accessible.patch