From: Greg Kroah-Hartman Date: Sat, 28 Nov 2020 12:52:07 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v4.4.247~30 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=348688c391bcb16c91aba6cd926905a76bfec01a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch arm64-pgtable-fix-pte_accessible.patch kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch trace-fix-potenial-dangerous-pointer.patch --- diff --git a/queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch b/queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch new file mode 100644 index 00000000000..436c4d8c145 --- /dev/null +++ b/queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch @@ -0,0 +1,77 @@ +From ff1712f953e27f0b0718762ec17d0adb15c9fd0b Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 20 Nov 2020 13:57:48 +0000 +Subject: arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect() + +From: Will Deacon + +commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b upstream. + +With hardware dirty bit management, calling pte_wrprotect() on a writable, +dirty PTE will lose the dirty state and return a read-only, clean entry. + +Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that +the dirty bit is preserved for writable entries, as this is required for +soft-dirty bit management if we enable it in the future. + +Cc: +Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") +Reviewed-by: Catalin Marinas +Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -136,13 +136,6 @@ static inline pte_t set_pte_bit(pte_t pt + return pte; + } + +-static inline pte_t pte_wrprotect(pte_t pte) +-{ +- pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); +- pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); +- return pte; +-} +- + static inline pte_t pte_mkwrite(pte_t pte) + { + pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); +@@ -168,6 +161,20 @@ static inline pte_t pte_mkdirty(pte_t pt + return pte; + } + ++static inline pte_t pte_wrprotect(pte_t pte) ++{ ++ /* ++ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY ++ * clear), set the PTE_DIRTY bit. ++ */ ++ if (pte_hw_dirty(pte)) ++ pte = pte_mkdirty(pte); ++ ++ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); ++ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); ++ return pte; ++} ++ + static inline pte_t pte_mkold(pte_t pte) + { + return clear_pte_bit(pte, __pgprot(PTE_AF)); +@@ -783,12 +790,6 @@ static inline void ptep_set_wrprotect(st + pte = READ_ONCE(*ptep); + do { + old_pte = pte; +- /* +- * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY +- * clear), set the PTE_DIRTY bit. +- */ +- if (pte_hw_dirty(pte)) +- pte = pte_mkdirty(pte); + pte = pte_wrprotect(pte); + pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), + pte_val(old_pte), pte_val(pte)); diff --git a/queue-5.4/arm64-pgtable-fix-pte_accessible.patch b/queue-5.4/arm64-pgtable-fix-pte_accessible.patch new file mode 100644 index 00000000000..ea2c3d417bf --- /dev/null +++ b/queue-5.4/arm64-pgtable-fix-pte_accessible.patch @@ -0,0 +1,59 @@ +From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 20 Nov 2020 13:28:01 +0000 +Subject: arm64: pgtable: Fix pte_accessible() + +From: Will Deacon + +commit 07509e10dcc77627f8b6a57381e878fe269958d3 upstream. + +pte_accessible() is used by ptep_clear_flush() to figure out whether TLB +invalidation is necessary when unmapping pages for reclaim. Although our +implementation is correct according to the architecture, returning true +only for valid, young ptes in the absence of racing page-table +modifications, this is in fact flawed due to lazy invalidation of old +ptes in ptep_clear_flush_young() where we elide the expensive DSB +instruction for completing the TLB invalidation. + +Rather than penalise the aging path, adjust pte_accessible() to return +true for any valid pte, even if the access flag is cleared. + +Cc: +Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()") +Reported-by: Yu Zhao +Acked-by: Yu Zhao +Reviewed-by: Minchan Kim +Reviewed-by: Catalin Marinas +Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/pgtable.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/arch/arm64/include/asm/pgtable.h ++++ b/arch/arm64/include/asm/pgtable.h +@@ -98,8 +98,6 @@ extern unsigned long empty_zero_page[PAG + #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) + #define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +-#define pte_valid_young(pte) \ +- ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + #define pte_valid_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) + +@@ -107,9 +105,12 @@ extern unsigned long empty_zero_page[PAG + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. ++ * Note that we can't make any assumptions based on the state of the access ++ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the ++ * TLB. + */ + #define pte_accessible(mm, pte) \ +- (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) ++ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) + + /* + * p??_access_permitted() is true for valid user mappings (subject to the diff --git a/queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch b/queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch new file mode 100644 index 00000000000..b190201d28a --- /dev/null +++ b/queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch @@ -0,0 +1,79 @@ +From 23bde34771f1ea92fb5e6682c0d8c04304d34b3b Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Tue, 17 Nov 2020 23:16:29 +0800 +Subject: KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for userspace + +From: Zenghui Yu + +commit 23bde34771f1ea92fb5e6682c0d8c04304d34b3b upstream. + +It was recently reported that if GICR_TYPER is accessed before the RD base +address is set, we'll suffer from the unset @rdreg dereferencing. Oops... + + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; + +It's "expected" that users will access registers in the redistributor if +the RD has been properly configured (e.g., the RD base address is set). But +it hasn't yet been covered by the existing documentation. + +Per discussion on the list [1], the reporting of the GICR_TYPER.Last bit +for userspace never actually worked. And it's difficult for us to emulate +it correctly given that userspace has the flexibility to access it any +time. Let's just drop the reporting of the Last bit for userspace for now +(userspace should have full knowledge about it anyway) and it at least +prevents kernel from panic ;-) + +[1] https://lore.kernel.org/kvmarm/c20865a267e44d1e2c0d52ce4e012263@kernel.org/ + +Fixes: ba7b3f1275fd ("KVM: arm/arm64: Revisit Redistributor TYPER last bit computation") +Reported-by: Keqian Zhu +Signed-off-by: Zenghui Yu +Signed-off-by: Marc Zyngier +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20201117151629.1738-1-yuzenghui@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + virt/kvm/arm/vgic/vgic-mmio-v3.c | 22 ++++++++++++++++++++-- + 1 file changed, 20 insertions(+), 2 deletions(-) + +--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c ++++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c +@@ -223,6 +223,23 @@ static unsigned long vgic_mmio_read_v3r_ + return extract_bytes(value, addr & 7, len); + } + ++static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, ++ gpa_t addr, unsigned int len) ++{ ++ unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); ++ int target_vcpu_id = vcpu->vcpu_id; ++ u64 value; ++ ++ value = (u64)(mpidr & GENMASK(23, 0)) << 32; ++ value |= ((target_vcpu_id & 0xffff) << 8); ++ ++ if (vgic_has_its(vcpu->kvm)) ++ value |= GICR_TYPER_PLPIS; ++ ++ /* reporting of the Last bit is not supported for userspace */ ++ return extract_bytes(value, addr & 7, len); ++} ++ + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) + { +@@ -528,8 +545,9 @@ static const struct vgic_register_region + REGISTER_DESC_WITH_LENGTH(GICR_IIDR, + vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), +- REGISTER_DESC_WITH_LENGTH(GICR_TYPER, +- vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, ++ REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, ++ vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, ++ vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, diff --git a/queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch b/queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch new file mode 100644 index 00000000000..63d0e070454 --- /dev/null +++ b/queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch @@ -0,0 +1,80 @@ +From 75b49620267c700f0a07fec7f27f69852db70e46 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 5 Nov 2020 14:47:13 +0100 +Subject: KVM: PPC: Book3S HV: XIVE: Fix possible oops when accessing ESB page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cédric Le Goater + +commit 75b49620267c700f0a07fec7f27f69852db70e46 upstream. + +When accessing the ESB page of a source interrupt, the fault handler +will retrieve the page address from the XIVE interrupt 'xive_irq_data' +structure. If the associated KVM XIVE interrupt is not valid, that is +not allocated at the HW level for some reason, the fault handler will +dereference a NULL pointer leading to the oops below : + + WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm] + CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G W --------- - - 4.18.0-240.el8.ppc64le #1 + NIP: c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8 + REGS: c000001f69617840 TRAP: 0700 Tainted: G W --------- - - (4.18.0-240.el8.ppc64le) + MSR: 9000000000029033 CR: 44044282 XER: 00000000 + CFAR: c00000000044b160 IRQMASK: 0 + GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10 + GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff + GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001 + GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000 + GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 + GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90 + GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78 + GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011 + NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm] + LR [c00000000044b164] __do_fault+0x64/0x220 + Call Trace: + [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable) + [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220 + [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930 + [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0 + [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310 + [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0 + [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0 + [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38 + Instruction dump: + 40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac + 7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018 + ---[ end trace 66c6ff034c53f64f ]--- + xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 ! + +Fix that by checking the validity of the KVM XIVE interrupt structure. + +Fixes: 6520ca64cde7 ("KVM: PPC: Book3S HV: XIVE: Add a mapping for the source ESB pages") +Cc: stable@vger.kernel.org # v5.2+ +Reported-by: Greg Kurz +Signed-off-by: Cédric Le Goater +Tested-by: Greg Kurz +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20201105134713.656160-1-clg@kaod.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_xive_native.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -252,6 +252,13 @@ static vm_fault_t xive_native_esb_fault( + } + + state = &sb->irq_state[src]; ++ ++ /* Some sanity checking */ ++ if (!state->valid) { ++ pr_devel("%s: source %lx invalid !\n", __func__, irq); ++ return VM_FAULT_SIGBUS; ++ } ++ + kvmppc_xive_select_irq(state, &hw_num, &xd); + + arch_spin_lock(&sb->lock); diff --git a/queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch b/queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch new file mode 100644 index 00000000000..84537c20dce --- /dev/null +++ b/queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch @@ -0,0 +1,139 @@ +From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 27 Nov 2020 09:18:20 +0100 +Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request + +From: Paolo Bonzini + +commit 71cc849b7093bb83af966c0e60cb11b7f35cd746 upstream. + +kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are +a hodge-podge of conditions, hacked together to get something that +more or less works. But what is actually needed is much simpler; +in both cases the fundamental question is, do we have a place to stash +an interrupt if userspace does KVM_INTERRUPT? + +In userspace irqchip mode, that is !vcpu->arch.interrupt.injected. +Currently kvm_event_needs_reinjection(vcpu) covers it, but it is +unnecessarily restrictive. + +In split irqchip mode it's a bit more complicated, we need to check +kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK +cycle and thus requires ExtINTs not to be masked) as well as +!pending_userspace_extint(vcpu). However, there is no need to +check kvm_event_needs_reinjection(vcpu), since split irqchip keeps +pending ExtINT state separate from event injection state, and checking +kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher +priority than APIC interrupts. In fact the latter fixes a bug: +when userspace requests an IRQ window vmexit, an interrupt in the +local APIC can cause kvm_cpu_has_interrupt() to be true and thus +kvm_vcpu_ready_for_interrupt_injection() to return false. When this +happens, vcpu_run does not exit to userspace but the interrupt window +vmexits keep occurring. The VM loops without any hope of making progress. + +Once we try to fix these with something like + + return kvm_arch_interrupt_allowed(vcpu) && +- !kvm_cpu_has_interrupt(vcpu) && +- !kvm_event_needs_reinjection(vcpu) && +- kvm_cpu_accept_dm_intr(vcpu); ++ (!lapic_in_kernel(vcpu) ++ ? !vcpu->arch.interrupt.injected ++ : (kvm_apic_accept_pic_intr(vcpu) ++ && !pending_userspace_extint(v))); + +we realize two things. First, thanks to the previous patch the complex +conditional can reuse !kvm_cpu_has_extint(vcpu). Second, the interrupt +window request in vcpu_enter_guest() + + bool req_int_win = + dm_request_for_irq_injection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + +should be kept in sync with kvm_vcpu_ready_for_interrupt_injection(): +it is unnecessary to ask the processor for an interrupt window +if we would not be able to return to userspace. Therefore, +kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu) +ANDed with the existing check for masked ExtINT. It all makes sense: + +- we can accept an interrupt from userspace if there is a place + to stash it (and, for irqchip split, ExtINTs are not masked). + Interrupts from userspace _can_ be accepted even if right now + EFLAGS.IF=0. + +- in order to tell userspace we will inject its interrupt ("IRQ + window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both + KVM and the vCPU need to be ready to accept the interrupt. + +... and this is what the patch implements. + +Reported-by: David Woodhouse +Analyzed-by: David Woodhouse +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Reviewed-by: Nikos Tsironis +Reviewed-by: David Woodhouse +Tested-by: David Woodhouse +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/irq.c | 2 +- + arch/x86/kvm/x86.c | 18 ++++++++++-------- + 3 files changed, 12 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1560,6 +1560,7 @@ int kvm_test_age_hva(struct kvm *kvm, un + int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); + int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); + int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); ++int kvm_cpu_has_extint(struct kvm_vcpu *v); + int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); + int kvm_cpu_get_interrupt(struct kvm_vcpu *v); + void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); +--- a/arch/x86/kvm/irq.c ++++ b/arch/x86/kvm/irq.c +@@ -40,7 +40,7 @@ static int pending_userspace_extint(stru + * check if there is pending interrupt from + * non-APIC source without intack. + */ +-static int kvm_cpu_has_extint(struct kvm_vcpu *v) ++int kvm_cpu_has_extint(struct kvm_vcpu *v) + { + /* + * FIXME: interrupt.injected represents an interrupt whose +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -3624,21 +3624,23 @@ static int kvm_vcpu_ioctl_set_lapic(stru + + static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) + { ++ /* ++ * We can accept userspace's request for interrupt injection ++ * as long as we have a place to store the interrupt number. ++ * The actual injection will happen when the CPU is able to ++ * deliver the interrupt. ++ */ ++ if (kvm_cpu_has_extint(vcpu)) ++ return false; ++ ++ /* Acknowledging ExtINT does not happen if LINT0 is masked. */ + return (!lapic_in_kernel(vcpu) || + kvm_apic_accept_pic_intr(vcpu)); + } + +-/* +- * if userspace requested an interrupt window, check that the +- * interrupt window is open. +- * +- * No need to exit to userspace if we already have an interrupt queued. +- */ + static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) + { + return kvm_arch_interrupt_allowed(vcpu) && +- !kvm_cpu_has_interrupt(vcpu) && +- !kvm_event_needs_reinjection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + } + diff --git a/queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch b/queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch new file mode 100644 index 00000000000..87018948396 --- /dev/null +++ b/queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch @@ -0,0 +1,163 @@ +From 72c3bcdcda494cbd600712a32e67702cdee60c07 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 27 Nov 2020 08:53:52 +0100 +Subject: KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint + +From: Paolo Bonzini + +commit 72c3bcdcda494cbd600712a32e67702cdee60c07 upstream. + +Centralize handling of interrupts from the userspace APIC +in kvm_cpu_has_extint and kvm_cpu_get_extint, since +userspace APIC interrupts are handled more or less the +same as ExtINTs are with split irqchip. This removes +duplicated code from kvm_cpu_has_injectable_intr and +kvm_cpu_has_interrupt, and makes the code more similar +between kvm_cpu_has_{extint,interrupt} on one side +and kvm_cpu_get_{extint,interrupt} on the other. + +Cc: stable@vger.kernel.org +Reviewed-by: Filippo Sironi +Reviewed-by: David Woodhouse +Tested-by: David Woodhouse +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/irq.c | 83 ++++++++++++++++++++------------------------------- + arch/x86/kvm/lapic.c | 2 - + 2 files changed, 34 insertions(+), 51 deletions(-) + +--- a/arch/x86/kvm/irq.c ++++ b/arch/x86/kvm/irq.c +@@ -42,27 +42,8 @@ static int pending_userspace_extint(stru + */ + static int kvm_cpu_has_extint(struct kvm_vcpu *v) + { +- u8 accept = kvm_apic_accept_pic_intr(v); +- +- if (accept) { +- if (irqchip_split(v->kvm)) +- return pending_userspace_extint(v); +- else +- return v->kvm->arch.vpic->output; +- } else +- return 0; +-} +- +-/* +- * check if there is injectable interrupt: +- * when virtual interrupt delivery enabled, +- * interrupt from apic will handled by hardware, +- * we don't need to check it here. +- */ +-int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +-{ + /* +- * FIXME: interrupt.injected represents an interrupt that it's ++ * FIXME: interrupt.injected represents an interrupt whose + * side-effects have already been applied (e.g. bit from IRR + * already moved to ISR). Therefore, it is incorrect to rely + * on interrupt.injected to know if there is a pending +@@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct k + if (!lapic_in_kernel(v)) + return v->arch.interrupt.injected; + ++ if (!kvm_apic_accept_pic_intr(v)) ++ return 0; ++ ++ if (irqchip_split(v->kvm)) ++ return pending_userspace_extint(v); ++ else ++ return v->kvm->arch.vpic->output; ++} ++ ++/* ++ * check if there is injectable interrupt: ++ * when virtual interrupt delivery enabled, ++ * interrupt from apic will handled by hardware, ++ * we don't need to check it here. ++ */ ++int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) ++{ + if (kvm_cpu_has_extint(v)) + return 1; + +@@ -90,20 +88,6 @@ int kvm_cpu_has_injectable_intr(struct k + */ + int kvm_cpu_has_interrupt(struct kvm_vcpu *v) + { +- /* +- * FIXME: interrupt.injected represents an interrupt that it's +- * side-effects have already been applied (e.g. bit from IRR +- * already moved to ISR). Therefore, it is incorrect to rely +- * on interrupt.injected to know if there is a pending +- * interrupt in the user-mode LAPIC. +- * This leads to nVMX/nSVM not be able to distinguish +- * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on +- * pending interrupt or should re-inject an injected +- * interrupt. +- */ +- if (!lapic_in_kernel(v)) +- return v->arch.interrupt.injected; +- + if (kvm_cpu_has_extint(v)) + return 1; + +@@ -117,16 +101,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt) + */ + static int kvm_cpu_get_extint(struct kvm_vcpu *v) + { +- if (kvm_cpu_has_extint(v)) { +- if (irqchip_split(v->kvm)) { +- int vector = v->arch.pending_external_vector; +- +- v->arch.pending_external_vector = -1; +- return vector; +- } else +- return kvm_pic_read_irq(v->kvm); /* PIC */ +- } else ++ if (!kvm_cpu_has_extint(v)) { ++ WARN_ON(!lapic_in_kernel(v)); + return -1; ++ } ++ ++ if (!lapic_in_kernel(v)) ++ return v->arch.interrupt.nr; ++ ++ if (irqchip_split(v->kvm)) { ++ int vector = v->arch.pending_external_vector; ++ ++ v->arch.pending_external_vector = -1; ++ return vector; ++ } else ++ return kvm_pic_read_irq(v->kvm); /* PIC */ + } + + /* +@@ -134,13 +123,7 @@ static int kvm_cpu_get_extint(struct kvm + */ + int kvm_cpu_get_interrupt(struct kvm_vcpu *v) + { +- int vector; +- +- if (!lapic_in_kernel(v)) +- return v->arch.interrupt.nr; +- +- vector = kvm_cpu_get_extint(v); +- ++ int vector = kvm_cpu_get_extint(v); + if (vector != -1) + return vector; /* PIC */ + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2330,7 +2330,7 @@ int kvm_apic_has_interrupt(struct kvm_vc + struct kvm_lapic *apic = vcpu->arch.apic; + u32 ppr; + +- if (!kvm_apic_hw_enabled(apic)) ++ if (!kvm_apic_present(vcpu)) + return -1; + + __apic_update_ppr(apic, &ppr); diff --git a/queue-5.4/series b/queue-5.4/series index 3fa01ec1b0b..701d7526471 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -11,3 +11,10 @@ smb3-call-cifs-reconnect-from-demultiplex-thread.patch smb3-avoid-mid-pending-list-corruption.patch smb3-handle-error-case-during-offload-read-path.patch cifs-fix-a-memleak-with-modefromsid.patch +kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch +kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch +kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch +kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch +trace-fix-potenial-dangerous-pointer.patch +arm64-pgtable-fix-pte_accessible.patch +arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch diff --git a/queue-5.4/trace-fix-potenial-dangerous-pointer.patch b/queue-5.4/trace-fix-potenial-dangerous-pointer.patch new file mode 100644 index 00000000000..3423f8b8574 --- /dev/null +++ b/queue-5.4/trace-fix-potenial-dangerous-pointer.patch @@ -0,0 +1,65 @@ +From fdeb17c70c9ecae655378761accf5a26a55a33cf Mon Sep 17 00:00:00 2001 +From: Hui Su +Date: Wed, 25 Nov 2020 00:52:05 +0800 +Subject: trace: fix potenial dangerous pointer + +From: Hui Su + +commit fdeb17c70c9ecae655378761accf5a26a55a33cf upstream. + +The bdi_dev_name() returns a char [64], and +the __entry->name is a char [32]. + +It maybe dangerous to TP_printk("%s", __entry->name) +after the strncpy(). + +CC: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20201124165205.GA23937@rlk +Acked-by: Steven Rostedt (VMware) +Acked-by: Tejun Heo +Signed-off-by: Hui Su +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + include/trace/events/writeback.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/include/trace/events/writeback.h ++++ b/include/trace/events/writeback.h +@@ -192,7 +192,7 @@ TRACE_EVENT(inode_foreign_history, + ), + + TP_fast_assign( +- strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); + __entry->ino = inode->i_ino; + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); + __entry->history = history; +@@ -221,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs, + ), + + TP_fast_assign( +- strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); + __entry->ino = inode->i_ino; + __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); + __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); +@@ -254,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty, + struct address_space *mapping = page_mapping(page); + struct inode *inode = mapping ? mapping->host : NULL; + +- strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->bdi_id = wb->bdi->id; + __entry->ino = inode ? inode->i_ino : 0; + __entry->memcg_id = wb->memcg_css->id; +@@ -287,7 +287,7 @@ TRACE_EVENT(flush_foreign, + ), + + TP_fast_assign( +- strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); ++ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); + __entry->frn_bdi_id = frn_bdi_id; + __entry->frn_memcg_id = frn_memcg_id;