]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 28 Nov 2020 12:52:07 +0000 (13:52 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 28 Nov 2020 12:52:07 +0000 (13:52 +0100)
added patches:
arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch
arm64-pgtable-fix-pte_accessible.patch
kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch
kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch
kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch
trace-fix-potenial-dangerous-pointer.patch

queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch [new file with mode: 0644]
queue-5.4/arm64-pgtable-fix-pte_accessible.patch [new file with mode: 0644]
queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch [new file with mode: 0644]
queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch [new file with mode: 0644]
queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch [new file with mode: 0644]
queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/trace-fix-potenial-dangerous-pointer.patch [new file with mode: 0644]

diff --git a/queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch b/queue-5.4/arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch
new file mode 100644 (file)
index 0000000..436c4d8
--- /dev/null
@@ -0,0 +1,77 @@
+From ff1712f953e27f0b0718762ec17d0adb15c9fd0b Mon Sep 17 00:00:00 2001
+From: Will Deacon <will@kernel.org>
+Date: Fri, 20 Nov 2020 13:57:48 +0000
+Subject: arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()
+
+From: Will Deacon <will@kernel.org>
+
+commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b upstream.
+
+With hardware dirty bit management, calling pte_wrprotect() on a writable,
+dirty PTE will lose the dirty state and return a read-only, clean entry.
+
+Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that
+the dirty bit is preserved for writable entries, as this is required for
+soft-dirty bit management if we enable it in the future.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits")
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/pgtable.h |   27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -136,13 +136,6 @@ static inline pte_t set_pte_bit(pte_t pt
+       return pte;
+ }
+-static inline pte_t pte_wrprotect(pte_t pte)
+-{
+-      pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+-      pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+-      return pte;
+-}
+-
+ static inline pte_t pte_mkwrite(pte_t pte)
+ {
+       pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+@@ -168,6 +161,20 @@ static inline pte_t pte_mkdirty(pte_t pt
+       return pte;
+ }
++static inline pte_t pte_wrprotect(pte_t pte)
++{
++      /*
++       * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
++       * clear), set the PTE_DIRTY bit.
++       */
++      if (pte_hw_dirty(pte))
++              pte = pte_mkdirty(pte);
++
++      pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
++      pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
++      return pte;
++}
++
+ static inline pte_t pte_mkold(pte_t pte)
+ {
+       return clear_pte_bit(pte, __pgprot(PTE_AF));
+@@ -783,12 +790,6 @@ static inline void ptep_set_wrprotect(st
+       pte = READ_ONCE(*ptep);
+       do {
+               old_pte = pte;
+-              /*
+-               * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+-               * clear), set the PTE_DIRTY bit.
+-               */
+-              if (pte_hw_dirty(pte))
+-                      pte = pte_mkdirty(pte);
+               pte = pte_wrprotect(pte);
+               pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+                                              pte_val(old_pte), pte_val(pte));
diff --git a/queue-5.4/arm64-pgtable-fix-pte_accessible.patch b/queue-5.4/arm64-pgtable-fix-pte_accessible.patch
new file mode 100644 (file)
index 0000000..ea2c3d4
--- /dev/null
@@ -0,0 +1,59 @@
+From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will@kernel.org>
+Date: Fri, 20 Nov 2020 13:28:01 +0000
+Subject: arm64: pgtable: Fix pte_accessible()
+
+From: Will Deacon <will@kernel.org>
+
+commit 07509e10dcc77627f8b6a57381e878fe269958d3 upstream.
+
+pte_accessible() is used by ptep_clear_flush() to figure out whether TLB
+invalidation is necessary when unmapping pages for reclaim. Although our
+implementation is correct according to the architecture, returning true
+only for valid, young ptes in the absence of racing page-table
+modifications, this is in fact flawed due to lazy invalidation of old
+ptes in ptep_clear_flush_young() where we elide the expensive DSB
+instruction for completing the TLB invalidation.
+
+Rather than penalise the aging path, adjust pte_accessible() to return
+true for any valid pte, even if the access flag is cleared.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()")
+Reported-by: Yu Zhao <yuzhao@google.com>
+Acked-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/pgtable.h |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -98,8 +98,6 @@ extern unsigned long empty_zero_page[PAG
+ #define pte_valid(pte)                (!!(pte_val(pte) & PTE_VALID))
+ #define pte_valid_not_user(pte) \
+       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+-#define pte_valid_young(pte) \
+-      ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+ #define pte_valid_user(pte) \
+       ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
+@@ -107,9 +105,12 @@ extern unsigned long empty_zero_page[PAG
+  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
+  * so that we don't erroneously return false for pages that have been
+  * remapped as PROT_NONE but are yet to be flushed from the TLB.
++ * Note that we can't make any assumptions based on the state of the access
++ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
++ * TLB.
+  */
+ #define pte_accessible(mm, pte)       \
+-      (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
++      (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+ /*
+  * p??_access_permitted() is true for valid user mappings (subject to the
diff --git a/queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch b/queue-5.4/kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch
new file mode 100644 (file)
index 0000000..b190201
--- /dev/null
@@ -0,0 +1,79 @@
+From 23bde34771f1ea92fb5e6682c0d8c04304d34b3b Mon Sep 17 00:00:00 2001
+From: Zenghui Yu <yuzenghui@huawei.com>
+Date: Tue, 17 Nov 2020 23:16:29 +0800
+Subject: KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for userspace
+
+From: Zenghui Yu <yuzenghui@huawei.com>
+
+commit 23bde34771f1ea92fb5e6682c0d8c04304d34b3b upstream.
+
+It was recently reported that if GICR_TYPER is accessed before the RD base
+address is set, we'll suffer from the unset @rdreg dereferencing. Oops...
+
+       gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
+                       (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
+
+It's "expected" that users will access registers in the redistributor if
+the RD has been properly configured (e.g., the RD base address is set). But
+it hasn't yet been covered by the existing documentation.
+
+Per discussion on the list [1], the reporting of the GICR_TYPER.Last bit
+for userspace never actually worked. And it's difficult for us to emulate
+it correctly given that userspace has the flexibility to access it any
+time. Let's just drop the reporting of the Last bit for userspace for now
+(userspace should have full knowledge about it anyway) and it at least
+prevents kernel from panic ;-)
+
+[1] https://lore.kernel.org/kvmarm/c20865a267e44d1e2c0d52ce4e012263@kernel.org/
+
+Fixes: ba7b3f1275fd ("KVM: arm/arm64: Revisit Redistributor TYPER last bit computation")
+Reported-by: Keqian Zhu <zhukeqian1@huawei.com>
+Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Link: https://lore.kernel.org/r/20201117151629.1738-1-yuzenghui@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ virt/kvm/arm/vgic/vgic-mmio-v3.c |   22 ++++++++++++++++++++--
+ 1 file changed, 20 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
++++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
+@@ -223,6 +223,23 @@ static unsigned long vgic_mmio_read_v3r_
+       return extract_bytes(value, addr & 7, len);
+ }
++static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
++                                               gpa_t addr, unsigned int len)
++{
++      unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
++      int target_vcpu_id = vcpu->vcpu_id;
++      u64 value;
++
++      value = (u64)(mpidr & GENMASK(23, 0)) << 32;
++      value |= ((target_vcpu_id & 0xffff) << 8);
++
++      if (vgic_has_its(vcpu->kvm))
++              value |= GICR_TYPER_PLPIS;
++
++      /* reporting of the Last bit is not supported for userspace */
++      return extract_bytes(value, addr & 7, len);
++}
++
+ static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
+ {
+@@ -528,8 +545,9 @@ static const struct vgic_register_region
+       REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
+               vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
+               VGIC_ACCESS_32bit),
+-      REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
+-              vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
++      REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
++              vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
++              vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
+               VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
+               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
diff --git a/queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch b/queue-5.4/kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch
new file mode 100644 (file)
index 0000000..63d0e07
--- /dev/null
@@ -0,0 +1,80 @@
+From 75b49620267c700f0a07fec7f27f69852db70e46 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
+Date: Thu, 5 Nov 2020 14:47:13 +0100
+Subject: KVM: PPC: Book3S HV: XIVE: Fix possible oops when accessing ESB page
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cédric Le Goater <clg@kaod.org>
+
+commit 75b49620267c700f0a07fec7f27f69852db70e46 upstream.
+
+When accessing the ESB page of a source interrupt, the fault handler
+will retrieve the page address from the XIVE interrupt 'xive_irq_data'
+structure. If the associated KVM XIVE interrupt is not valid, that is
+not allocated at the HW level for some reason, the fault handler will
+dereference a NULL pointer leading to the oops below :
+
+  WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm]
+  CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G        W        --------- -  - 4.18.0-240.el8.ppc64le #1
+  NIP:  c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8
+  REGS: c000001f69617840 TRAP: 0700   Tainted: G        W        --------- -  -  (4.18.0-240.el8.ppc64le)
+  MSR:  9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44044282  XER: 00000000
+  CFAR: c00000000044b160 IRQMASK: 0
+  GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10
+  GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff
+  GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001
+  GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000
+  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+  GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90
+  GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78
+  GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011
+  NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm]
+  LR [c00000000044b164] __do_fault+0x64/0x220
+  Call Trace:
+  [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable)
+  [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220
+  [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930
+  [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0
+  [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310
+  [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0
+  [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0
+  [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38
+  Instruction dump:
+  40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac
+  7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018
+  ---[ end trace 66c6ff034c53f64f ]---
+  xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 !
+
+Fix that by checking the validity of the KVM XIVE interrupt structure.
+
+Fixes: 6520ca64cde7 ("KVM: PPC: Book3S HV: XIVE: Add a mapping for the source ESB pages")
+Cc: stable@vger.kernel.org # v5.2+
+Reported-by: Greg Kurz <groug@kaod.org>
+Signed-off-by: Cédric Le Goater <clg@kaod.org>
+Tested-by: Greg Kurz <groug@kaod.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20201105134713.656160-1-clg@kaod.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/kvm/book3s_xive_native.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -252,6 +252,13 @@ static vm_fault_t xive_native_esb_fault(
+       }
+       state = &sb->irq_state[src];
++
++      /* Some sanity checking */
++      if (!state->valid) {
++              pr_devel("%s: source %lx invalid !\n", __func__, irq);
++              return VM_FAULT_SIGBUS;
++      }
++
+       kvmppc_xive_select_irq(state, &hw_num, &xd);
+       arch_spin_lock(&sb->lock);
diff --git a/queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch b/queue-5.4/kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
new file mode 100644 (file)
index 0000000..84537c2
--- /dev/null
@@ -0,0 +1,139 @@
+From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 27 Nov 2020 09:18:20 +0100
+Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 71cc849b7093bb83af966c0e60cb11b7f35cd746 upstream.
+
+kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are
+a hodge-podge of conditions, hacked together to get something that
+more or less works.  But what is actually needed is much simpler;
+in both cases the fundamental question is, do we have a place to stash
+an interrupt if userspace does KVM_INTERRUPT?
+
+In userspace irqchip mode, that is !vcpu->arch.interrupt.injected.
+Currently kvm_event_needs_reinjection(vcpu) covers it, but it is
+unnecessarily restrictive.
+
+In split irqchip mode it's a bit more complicated, we need to check
+kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK
+cycle and thus requires ExtINTs not to be masked) as well as
+!pending_userspace_extint(vcpu).  However, there is no need to
+check kvm_event_needs_reinjection(vcpu), since split irqchip keeps
+pending ExtINT state separate from event injection state, and checking
+kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher
+priority than APIC interrupts.  In fact the latter fixes a bug:
+when userspace requests an IRQ window vmexit, an interrupt in the
+local APIC can cause kvm_cpu_has_interrupt() to be true and thus
+kvm_vcpu_ready_for_interrupt_injection() to return false.  When this
+happens, vcpu_run does not exit to userspace but the interrupt window
+vmexits keep occurring.  The VM loops without any hope of making progress.
+
+Once we try to fix these with something like
+
+     return kvm_arch_interrupt_allowed(vcpu) &&
+-        !kvm_cpu_has_interrupt(vcpu) &&
+-        !kvm_event_needs_reinjection(vcpu) &&
+-        kvm_cpu_accept_dm_intr(vcpu);
++        (!lapic_in_kernel(vcpu)
++         ? !vcpu->arch.interrupt.injected
++         : (kvm_apic_accept_pic_intr(vcpu)
++            && !pending_userspace_extint(v)));
+
+we realize two things.  First, thanks to the previous patch the complex
+conditional can reuse !kvm_cpu_has_extint(vcpu).  Second, the interrupt
+window request in vcpu_enter_guest()
+
+        bool req_int_win =
+                dm_request_for_irq_injection(vcpu) &&
+                kvm_cpu_accept_dm_intr(vcpu);
+
+should be kept in sync with kvm_vcpu_ready_for_interrupt_injection():
+it is unnecessary to ask the processor for an interrupt window
+if we would not be able to return to userspace.  Therefore,
+kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu)
+ANDed with the existing check for masked ExtINT.  It all makes sense:
+
+- we can accept an interrupt from userspace if there is a place
+  to stash it (and, for irqchip split, ExtINTs are not masked).
+  Interrupts from userspace _can_ be accepted even if right now
+  EFLAGS.IF=0.
+
+- in order to tell userspace we will inject its interrupt ("IRQ
+  window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both
+  KVM and the vCPU need to be ready to accept the interrupt.
+
+... and this is what the patch implements.
+
+Reported-by: David Woodhouse <dwmw@amazon.co.uk>
+Analyzed-by: David Woodhouse <dwmw@amazon.co.uk>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Nikos Tsironis <ntsironis@arrikto.com>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Tested-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/irq.c              |    2 +-
+ arch/x86/kvm/x86.c              |   18 ++++++++++--------
+ 3 files changed, 12 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1560,6 +1560,7 @@ int kvm_test_age_hva(struct kvm *kvm, un
+ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
++int kvm_cpu_has_extint(struct kvm_vcpu *v);
+ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+--- a/arch/x86/kvm/irq.c
++++ b/arch/x86/kvm/irq.c
+@@ -40,7 +40,7 @@ static int pending_userspace_extint(stru
+  * check if there is pending interrupt from
+  * non-APIC source without intack.
+  */
+-static int kvm_cpu_has_extint(struct kvm_vcpu *v)
++int kvm_cpu_has_extint(struct kvm_vcpu *v)
+ {
+       /*
+        * FIXME: interrupt.injected represents an interrupt whose
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3624,21 +3624,23 @@ static int kvm_vcpu_ioctl_set_lapic(stru
+ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+ {
++      /*
++       * We can accept userspace's request for interrupt injection
++       * as long as we have a place to store the interrupt number.
++       * The actual injection will happen when the CPU is able to
++       * deliver the interrupt.
++       */
++      if (kvm_cpu_has_extint(vcpu))
++              return false;
++
++      /* Acknowledging ExtINT does not happen if LINT0 is masked.  */
+       return (!lapic_in_kernel(vcpu) ||
+               kvm_apic_accept_pic_intr(vcpu));
+ }
+-/*
+- * if userspace requested an interrupt window, check that the
+- * interrupt window is open.
+- *
+- * No need to exit to userspace if we already have an interrupt queued.
+- */
+ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+ {
+       return kvm_arch_interrupt_allowed(vcpu) &&
+-              !kvm_cpu_has_interrupt(vcpu) &&
+-              !kvm_event_needs_reinjection(vcpu) &&
+               kvm_cpu_accept_dm_intr(vcpu);
+ }
diff --git a/queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch b/queue-5.4/kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch
new file mode 100644 (file)
index 0000000..8701894
--- /dev/null
@@ -0,0 +1,163 @@
+From 72c3bcdcda494cbd600712a32e67702cdee60c07 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 27 Nov 2020 08:53:52 +0100
+Subject: KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 72c3bcdcda494cbd600712a32e67702cdee60c07 upstream.
+
+Centralize handling of interrupts from the userspace APIC
+in kvm_cpu_has_extint and kvm_cpu_get_extint, since
+userspace APIC interrupts are handled more or less the
+same as ExtINTs are with split irqchip.  This removes
+duplicated code from kvm_cpu_has_injectable_intr and
+kvm_cpu_has_interrupt, and makes the code more similar
+between kvm_cpu_has_{extint,interrupt} on one side
+and kvm_cpu_get_{extint,interrupt} on the other.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Filippo Sironi <sironi@amazon.de>
+Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
+Tested-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/irq.c   |   83 ++++++++++++++++++++-------------------------------
+ arch/x86/kvm/lapic.c |    2 -
+ 2 files changed, 34 insertions(+), 51 deletions(-)
+
+--- a/arch/x86/kvm/irq.c
++++ b/arch/x86/kvm/irq.c
+@@ -42,27 +42,8 @@ static int pending_userspace_extint(stru
+  */
+ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
+ {
+-      u8 accept = kvm_apic_accept_pic_intr(v);
+-
+-      if (accept) {
+-              if (irqchip_split(v->kvm))
+-                      return pending_userspace_extint(v);
+-              else
+-                      return v->kvm->arch.vpic->output;
+-      } else
+-              return 0;
+-}
+-
+-/*
+- * check if there is injectable interrupt:
+- * when virtual interrupt delivery enabled,
+- * interrupt from apic will handled by hardware,
+- * we don't need to check it here.
+- */
+-int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+-{
+       /*
+-       * FIXME: interrupt.injected represents an interrupt that it's
++       * FIXME: interrupt.injected represents an interrupt whose
+        * side-effects have already been applied (e.g. bit from IRR
+        * already moved to ISR). Therefore, it is incorrect to rely
+        * on interrupt.injected to know if there is a pending
+@@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct k
+       if (!lapic_in_kernel(v))
+               return v->arch.interrupt.injected;
++      if (!kvm_apic_accept_pic_intr(v))
++              return 0;
++
++      if (irqchip_split(v->kvm))
++              return pending_userspace_extint(v);
++      else
++              return v->kvm->arch.vpic->output;
++}
++
++/*
++ * check if there is injectable interrupt:
++ * when virtual interrupt delivery enabled,
++ * interrupt from apic will handled by hardware,
++ * we don't need to check it here.
++ */
++int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
++{
+       if (kvm_cpu_has_extint(v))
+               return 1;
+@@ -90,20 +88,6 @@ int kvm_cpu_has_injectable_intr(struct k
+  */
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
+ {
+-      /*
+-       * FIXME: interrupt.injected represents an interrupt that it's
+-       * side-effects have already been applied (e.g. bit from IRR
+-       * already moved to ISR). Therefore, it is incorrect to rely
+-       * on interrupt.injected to know if there is a pending
+-       * interrupt in the user-mode LAPIC.
+-       * This leads to nVMX/nSVM not be able to distinguish
+-       * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+-       * pending interrupt or should re-inject an injected
+-       * interrupt.
+-       */
+-      if (!lapic_in_kernel(v))
+-              return v->arch.interrupt.injected;
+-
+       if (kvm_cpu_has_extint(v))
+               return 1;
+@@ -117,16 +101,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt)
+  */
+ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
+ {
+-      if (kvm_cpu_has_extint(v)) {
+-              if (irqchip_split(v->kvm)) {
+-                      int vector = v->arch.pending_external_vector;
+-
+-                      v->arch.pending_external_vector = -1;
+-                      return vector;
+-              } else
+-                      return kvm_pic_read_irq(v->kvm); /* PIC */
+-      } else
++      if (!kvm_cpu_has_extint(v)) {
++              WARN_ON(!lapic_in_kernel(v));
+               return -1;
++      }
++
++      if (!lapic_in_kernel(v))
++              return v->arch.interrupt.nr;
++
++      if (irqchip_split(v->kvm)) {
++              int vector = v->arch.pending_external_vector;
++
++              v->arch.pending_external_vector = -1;
++              return vector;
++      } else
++              return kvm_pic_read_irq(v->kvm); /* PIC */
+ }
+ /*
+@@ -134,13 +123,7 @@ static int kvm_cpu_get_extint(struct kvm
+  */
+ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
+ {
+-      int vector;
+-
+-      if (!lapic_in_kernel(v))
+-              return v->arch.interrupt.nr;
+-
+-      vector = kvm_cpu_get_extint(v);
+-
++      int vector = kvm_cpu_get_extint(v);
+       if (vector != -1)
+               return vector;                  /* PIC */
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2330,7 +2330,7 @@ int kvm_apic_has_interrupt(struct kvm_vc
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       u32 ppr;
+-      if (!kvm_apic_hw_enabled(apic))
++      if (!kvm_apic_present(vcpu))
+               return -1;
+       __apic_update_ppr(apic, &ppr);
index 3fa01ec1b0b53cd2937f79ec0b4b1e662b3d7aa3..701d7526471b417123507dad8756ad77bf37aace 100644 (file)
@@ -11,3 +11,10 @@ smb3-call-cifs-reconnect-from-demultiplex-thread.patch
 smb3-avoid-mid-pending-list-corruption.patch
 smb3-handle-error-case-during-offload-read-path.patch
 cifs-fix-a-memleak-with-modefromsid.patch
+kvm-ppc-book3s-hv-xive-fix-possible-oops-when-accessing-esb-page.patch
+kvm-arm64-vgic-v3-drop-the-reporting-of-gicr_typer.last-for-userspace.patch
+kvm-x86-handle-lapic_in_kernel-case-in-kvm_cpu_-_extint.patch
+kvm-x86-fix-split-irqchip-vs-interrupt-injection-window-request.patch
+trace-fix-potenial-dangerous-pointer.patch
+arm64-pgtable-fix-pte_accessible.patch
+arm64-pgtable-ensure-dirty-bit-is-preserved-across-pte_wrprotect.patch
diff --git a/queue-5.4/trace-fix-potenial-dangerous-pointer.patch b/queue-5.4/trace-fix-potenial-dangerous-pointer.patch
new file mode 100644 (file)
index 0000000..3423f8b
--- /dev/null
@@ -0,0 +1,65 @@
+From fdeb17c70c9ecae655378761accf5a26a55a33cf Mon Sep 17 00:00:00 2001
+From: Hui Su <sh_def@163.com>
+Date: Wed, 25 Nov 2020 00:52:05 +0800
+Subject: trace: fix potenial dangerous pointer
+
+From: Hui Su <sh_def@163.com>
+
+commit fdeb17c70c9ecae655378761accf5a26a55a33cf upstream.
+
+The bdi_dev_name() returns a char [64], and
+the __entry->name is a char [32].
+
+It maybe dangerous to TP_printk("%s", __entry->name)
+after the strncpy().
+
+CC: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20201124165205.GA23937@rlk
+Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Hui Su <sh_def@163.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/trace/events/writeback.h |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/include/trace/events/writeback.h
++++ b/include/trace/events/writeback.h
+@@ -192,7 +192,7 @@ TRACE_EVENT(inode_foreign_history,
+       ),
+       TP_fast_assign(
+-              strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
++              strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
+               __entry->ino            = inode->i_ino;
+               __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
+               __entry->history        = history;
+@@ -221,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs,
+       ),
+       TP_fast_assign(
+-              strncpy(__entry->name,  bdi_dev_name(old_wb->bdi), 32);
++              strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32);
+               __entry->ino            = inode->i_ino;
+               __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
+               __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
+@@ -254,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty,
+               struct address_space *mapping = page_mapping(page);
+               struct inode *inode = mapping ? mapping->host : NULL;
+-              strncpy(__entry->name,  bdi_dev_name(wb->bdi), 32);
++              strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
+               __entry->bdi_id         = wb->bdi->id;
+               __entry->ino            = inode ? inode->i_ino : 0;
+               __entry->memcg_id       = wb->memcg_css->id;
+@@ -287,7 +287,7 @@ TRACE_EVENT(flush_foreign,
+       ),
+       TP_fast_assign(
+-              strncpy(__entry->name,  bdi_dev_name(wb->bdi), 32);
++              strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
+               __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
+               __entry->frn_bdi_id     = frn_bdi_id;
+               __entry->frn_memcg_id   = frn_memcg_id;