From: Greg Kroah-Hartman Date: Sat, 28 Nov 2020 12:51:35 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.247~32 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=670b2c94f812d1b4082f5aaa884b9aa1f7b9aa32;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch arm64-pgtable-fix-pte_accessible.patch kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch --- diff --git a/queue-4.14/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch b/queue-4.14/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch new file mode 100644 index 00000000000..5a4a12dd291 --- /dev/null +++ b/queue-4.14/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch @@ -0,0 +1,77 @@ +From ff1712f953e27f0b0718762ec17d0adb15c9fd0b Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 20 Nov 2020 13:57:48 +0000 +Subject: arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect() + +From: Will Deacon + +commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b upstream. + +With hardware dirty bit management, calling pte_wrprotect() on a writable, +dirty PTE will lose the dirty state and return a read-only, clean entry. + +Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that +the dirty bit is preserved for writable entries, as this is required for +soft-dirty bit management if we enable it in the future. + +Cc: +Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") +Reviewed-by: Catalin Marinas +Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -130,13 +130,6 @@ static inline pte_t set_pte_bit(pte_t pt + return pte; + } + +-static inline pte_t pte_wrprotect(pte_t pte) +-{ +- pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); +- pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); +- return pte; +-} +- + static inline pte_t pte_mkwrite(pte_t pte) + { + pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); +@@ -162,6 +155,20 @@ static inline pte_t pte_mkdirty(pte_t pt + return pte; + } + ++static inline pte_t pte_wrprotect(pte_t pte) ++{ ++ /* ++ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY ++ * clear), set the PTE_DIRTY bit. ++ */ ++ if (pte_hw_dirty(pte)) ++ pte = pte_mkdirty(pte); ++ ++ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); ++ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); ++ return pte; ++} ++ + static inline pte_t pte_mkold(pte_t pte) + { + return clear_pte_bit(pte, __pgprot(PTE_AF)); +@@ -643,12 +650,6 @@ static inline void ptep_set_wrprotect(st + pte = READ_ONCE(*ptep); + do { + old_pte = pte; +- /* +- * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY +- * clear), set the PTE_DIRTY bit. +- */ +- if (pte_hw_dirty(pte)) +- pte = pte_mkdirty(pte); + pte = pte_wrprotect(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); diff --git a/queue-4.14/arm64-pgtable-fix-pte_accessible.patch b/queue-4.14/arm64-pgtable-fix-pte_accessible.patch new file mode 100644 index 00000000000..e36ac1b7705 --- /dev/null +++ b/queue-4.14/arm64-pgtable-fix-pte_accessible.patch @@ -0,0 +1,59 @@ +From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 20 Nov 2020 13:28:01 +0000 +Subject: arm64: pgtable: Fix pte_accessible() + +From: Will Deacon + +commit 07509e10dcc77627f8b6a57381e878fe269958d3 upstream. + +pte_accessible() is used by ptep_clear_flush() to figure out whether TLB +invalidation is necessary when unmapping pages for reclaim. Although our +implementation is correct according to the architecture, returning true +only for valid, young ptes in the absence of racing page-table +modifications, this is in fact flawed due to lazy invalidation of old +ptes in ptep_clear_flush_young() where we elide the expensive DSB +instruction for completing the TLB invalidation. + +Rather than penalise the aging path, adjust pte_accessible() to return +true for any valid pte, even if the access flag is cleared. + +Cc: +Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()") +Reported-by: Yu Zhao +Acked-by: Yu Zhao +Reviewed-by: Minchan Kim +Reviewed-by: Catalin Marinas +Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/pgtable.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -92,8 +92,6 @@ extern unsigned long empty_zero_page[PAG + #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) + #define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +-#define pte_valid_young(pte) \ +- ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + #define pte_valid_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) + +@@ -101,9 +99,12 @@ extern unsigned long empty_zero_page[PAG + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. ++ * Note that we can't make any assumptions based on the state of the access ++ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the ++ * TLB. + */ + #define pte_accessible(mm, pte) \ +- (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) ++ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) + + /* + * p??_access_permitted() is true for valid user mappings (subject to the diff --git a/queue-4.14/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch b/queue-4.14/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch new file mode 100644 index 00000000000..2dd154ffa30 --- /dev/null +++ b/queue-4.14/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch @@ -0,0 +1,139 @@ +From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 27 Nov 2020 09:18:20 +0100 +Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request + +From: Paolo Bonzini + +commit 71cc849b7093bb83af966c0e60cb11b7f35cd746 upstream. + +kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are +a hodge-podge of conditions, hacked together to get something that +more or less works. But what is actually needed is much simpler; +in both cases the fundamental question is, do we have a place to stash +an interrupt if userspace does KVM_INTERRUPT? + +In userspace irqchip mode, that is !vcpu->arch.interrupt.injected. +Currently kvm_event_needs_reinjection(vcpu) covers it, but it is +unnecessarily restrictive. + +In split irqchip mode it's a bit more complicated, we need to check +kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK +cycle and thus requires ExtINTs not to be masked) as well as +!pending_userspace_extint(vcpu). However, there is no need to +check kvm_event_needs_reinjection(vcpu), since split irqchip keeps +pending ExtINT state separate from event injection state, and checking +kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher +priority than APIC interrupts. In fact the latter fixes a bug: +when userspace requests an IRQ window vmexit, an interrupt in the +local APIC can cause kvm_cpu_has_interrupt() to be true and thus +kvm_vcpu_ready_for_interrupt_injection() to return false. When this +happens, vcpu_run does not exit to userspace but the interrupt window +vmexits keep occurring. The VM loops without any hope of making progress. + +Once we try to fix these with something like + + return kvm_arch_interrupt_allowed(vcpu) && +- !kvm_cpu_has_interrupt(vcpu) && +- !kvm_event_needs_reinjection(vcpu) && +- kvm_cpu_accept_dm_intr(vcpu); ++ (!lapic_in_kernel(vcpu) ++ ? !vcpu->arch.interrupt.injected ++ : (kvm_apic_accept_pic_intr(vcpu) ++ && !pending_userspace_extint(v))); + +we realize two things. First, thanks to the previous patch the complex +conditional can reuse !kvm_cpu_has_extint(vcpu). Second, the interrupt +window request in vcpu_enter_guest() + + bool req_int_win = + dm_request_for_irq_injection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + +should be kept in sync with kvm_vcpu_ready_for_interrupt_injection(): +it is unnecessary to ask the processor for an interrupt window +if we would not be able to return to userspace. Therefore, +kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu) +ANDed with the existing check for masked ExtINT. It all makes sense: + +- we can accept an interrupt from userspace if there is a place + to stash it (and, for irqchip split, ExtINTs are not masked). + Interrupts from userspace _can_ be accepted even if right now + EFLAGS.IF=0. + +- in order to tell userspace we will inject its interrupt ("IRQ + window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both + KVM and the vCPU need to be ready to accept the interrupt. + +... and this is what the patch implements. + +Reported-by: David Woodhouse +Analyzed-by: David Woodhouse +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Reviewed-by: Nikos Tsironis +Reviewed-by: David Woodhouse +Tested-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/irq.c | 2 +- + arch/x86/kvm/x86.c | 18 ++++++++++-------- + 3 files changed, 12 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1395,6 +1395,7 @@ int kvm_test_age_hva(struct kvm *kvm, un + void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); + int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); ++int kvm_cpu_has_extint(struct kvm_vcpu *v); + int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); + int kvm_cpu_get_interrupt(struct kvm_vcpu *v); + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); +--- a/arch/x86/kvm/irq.c ++++ b/arch/x86/kvm/irq.c +@@ -52,7 +52,7 @@ static int pending_userspace_extint(stru + * check if there is pending interrupt from + * non-APIC source without intack. + */ +-static int kvm_cpu_has_extint(struct kvm_vcpu *v) ++int kvm_cpu_has_extint(struct kvm_vcpu *v) + { + u8 accept = kvm_apic_accept_pic_intr(v); + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3144,21 +3144,23 @@ static int kvm_vcpu_ioctl_set_lapic(stru + + static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) + { ++ /* ++ * We can accept userspace's request for interrupt injection ++ * as long as we have a place to store the interrupt number. ++ * The actual injection will happen when the CPU is able to ++ * deliver the interrupt. ++ */ ++ if (kvm_cpu_has_extint(vcpu)) ++ return false; ++ ++ /* Acknowledging ExtINT does not happen if LINT0 is masked. */ + return (!lapic_in_kernel(vcpu) || + kvm_apic_accept_pic_intr(vcpu)); + } + +-/* +- * if userspace requested an interrupt window, check that the +- * interrupt window is open. +- * +- * No need to exit to userspace if we already have an interrupt queued. +- */ + static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) + { + return kvm_arch_interrupt_allowed(vcpu) && +- !kvm_cpu_has_interrupt(vcpu) && +- !kvm_event_needs_reinjection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + } + diff --git a/queue-4.14/series b/queue-4.14/series index cfb6f8f19df..bfc15fec554 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -6,3 +6,6 @@ pci-add-device-even-if-driver-attach-failed.patch btrfs-tree-checker-enhance-chunk-checker-to-validate-chunk-profile.patch btrfs-adjust-return-values-of-btrfs_inode_by_name.patch btrfs-inode-verify-inode-mode-to-avoid-null-pointer-dereference.patch +kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch +arm64-pgtable-fix-pte_accessible.patch +arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch