From 7f0dcb0c33e448cfe2d4d5bbffea4318aa2a9f57 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Mon, 9 May 2022 20:56:59 -0400
Subject: [PATCH] Fixes for 5.15

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...lease-return-enodev-if-fbdev-was-unr.patch |  53 +++
 .../gpio-mvebu-drop-pwm-base-assignment.patch |  48 ++
 ...issing-module-owner-to-ops-structure.patch |  36 ++
 ...-timer-posted-interrupt-only-when-mw.patch |  54 +++
 ...-not-use-bitfields-larger-than-32-bi.patch | 433 ++++++++++++++++++
 ...lence-compiler-warning-in-the-kvm_pa.patch |  48 ++
 ...ly-provide-cpuid-leaf-0xa-if-host-ha.patch |  54 +++
 ...change-icr-on-write-to-apic_self_ipi.patch |  43 ++
 ...d-null-pointer-dereference-on-page-f.patch |  39 ++
 ...b-out-of-bounds-while-reading-resour.patch | 151 ++++++
 ...don-t-skip-fib-events-on-current-dst.patch | 114 +++++
 ...-lag-fix-fib_info-pointer-assignment.patch |  40 ++
 ...x-use-after-free-in-fib-event-handle.patch | 247 ++++++++++
 ...sleep-in-atomic-bug-when-firmware-do.patch |  70 +++
 ...t-vm-verify-mmap-addr-in-mremap_test.patch |  73 +++
 ...fy-remap-destination-address-in-mrem.patch |  66 +++
 queue-5.15/series                             |  17 +
 ...-bsp-msr_kvm_poll_control-across-sus.patch |  71 +++
 18 files changed, 1657 insertions(+)
 create mode 100644 queue-5.15/fbdev-make-fb_release-return-enodev-if-fbdev-was-unr.patch
 create mode 100644 queue-5.15/gpio-mvebu-drop-pwm-base-assignment.patch
 create mode 100644 queue-5.15/iommu-dart-add-missing-module-owner-to-ops-structure.patch
 create mode 100644 queue-5.15/kvm-lapic-enable-timer-posted-interrupt-only-when-mw.patch
 create mode 100644 queue-5.15/kvm-selftests-do-not-use-bitfields-larger-than-32-bi.patch
 create mode 100644 queue-5.15/kvm-selftests-silence-compiler-warning-in-the-kvm_pa.patch
 create mode 100644 queue-5.15/kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch
 create mode 100644 queue-5.15/kvm-x86-do-not-change-icr-on-write-to-apic_self_ipi.patch
 create mode 100644 queue-5.15/kvm-x86-mmu-avoid-null-pointer-dereference-on-page-f.patch
 create mode 100644 queue-5.15/net-mlx5-fix-slab-out-of-bounds-while-reading-resour.patch
 create mode 100644 queue-5.15/net-mlx5e-lag-don-t-skip-fib-events-on-current-dst.patch
 create mode 100644 queue-5.15/net-mlx5e-lag-fix-fib_info-pointer-assignment.patch
 create mode 100644 queue-5.15/net-mlx5e-lag-fix-use-after-free-in-fib-event-handle.patch
 create mode 100644 queue-5.15/nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch
 create mode 100644 queue-5.15/selftest-vm-verify-mmap-addr-in-mremap_test.patch
 create mode 100644 queue-5.15/selftest-vm-verify-remap-destination-address-in-mrem.patch
 create mode 100644 queue-5.15/x86-kvm-preserve-bsp-msr_kvm_poll_control-across-sus.patch

diff --git a/queue-5.15/fbdev-make-fb_release-return-enodev-if-fbdev-was-unr.patch b/queue-5.15/fbdev-make-fb_release-return-enodev-if-fbdev-was-unr.patch
new file mode 100644
index 00000000000..11b99358356
--- /dev/null
+++ b/queue-5.15/fbdev-make-fb_release-return-enodev-if-fbdev-was-unr.patch
@@ -0,0 +1,53 @@
+From 386bb0a5603b5a0cf950d70cbeb5bbcff764b502 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 15:50:14 +0200
+Subject: fbdev: Make fb_release() return -ENODEV if fbdev was unregistered
+
+From: Javier Martinez Canillas <javierm@redhat.com>
+
+[ Upstream commit aafa025c76dcc7d1a8c8f0bdefcbe4eb480b2f6a ]
+
+A reference to the framebuffer device struct fb_info is stored in the file
+private data, but this reference could no longer be valid and must not be
+accessed directly. Instead, the file_fb_info() accessor function must be
+used since it does sanity checking to make sure that the fb_info is valid.
+
+This can happen for example if the registered framebuffer device is for a
+driver that just uses a framebuffer provided by the system firmware. In
+that case, the fbdev core would unregister the framebuffer device when a
+real video driver is probed and ask to remove conflicting framebuffers.
+
+The bug has been present for a long time but commit 27599aacbaef ("fbdev:
+Hot-unplug firmware fb devices on forced removal") unmasked it since the
+fbdev core started unregistering the framebuffers' devices associated.
+
+Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal")
+Reported-by: Maxime Ripard <maxime@cerno.tech>
+Reported-by: Junxiao Chang <junxiao.chang@intel.com>
+Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220502135014.377945-1-javierm@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbmem.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
+index 0371ad233fdf..8e38a7a5cf2f 100644
+--- a/drivers/video/fbdev/core/fbmem.c
++++ b/drivers/video/fbdev/core/fbmem.c
+@@ -1436,7 +1436,10 @@ fb_release(struct inode *inode, struct file *file)
+ __acquires(&info->lock)
+ __releases(&info->lock)
+ {
+-	struct fb_info * const info = file->private_data;
++	struct fb_info * const info = file_fb_info(file);
++
++	if (!info)
++		return -ENODEV;
+ 
+ 	lock_fb_info(info);
+ 	if (info->fbops->fb_release)
+-- 
+2.35.1
+
diff --git a/queue-5.15/gpio-mvebu-drop-pwm-base-assignment.patch b/queue-5.15/gpio-mvebu-drop-pwm-base-assignment.patch
new file mode 100644
index 00000000000..1f75726bbaf
--- /dev/null
+++ b/queue-5.15/gpio-mvebu-drop-pwm-base-assignment.patch
@@ -0,0 +1,48 @@
+From 6559461a5b252da76d76fe4dae48d7a2d2a08842 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 09:23:40 +0300
+Subject: gpio: mvebu: drop pwm base assignment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Baruch Siach <baruch@tkos.co.il>
+
+[ Upstream commit e5f6e5d554ac274f9c8ba60078103d0425b93c19 ]
+
+pwmchip_add() unconditionally assigns the base ID dynamically. Commit
+f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically")
+dropped all base assignment from drivers under drivers/pwm/. It missed
+this driver. Fix that.
+
+Fixes: f9a8ee8c8bcd1 ("pwm: Always allocate PWM chip base ID dynamically")
+Signed-off-by: Baruch Siach <baruch@tkos.co.il>
+Reviewed-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+Acked-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-mvebu.c | 7 -------
+ 1 file changed, 7 deletions(-)
+
+diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
+index 8f429d9f3661..ad8822da7c27 100644
+--- a/drivers/gpio/gpio-mvebu.c
++++ b/drivers/gpio/gpio-mvebu.c
+@@ -871,13 +871,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
+ 	mvpwm->chip.dev = dev;
+ 	mvpwm->chip.ops = &mvebu_pwm_ops;
+ 	mvpwm->chip.npwm = mvchip->chip.ngpio;
+-	/*
+-	 * There may already be some PWM allocated, so we can't force
+-	 * mvpwm->chip.base to a fixed point like mvchip->chip.base.
+-	 * So, we let pwmchip_add() do the numbering and take the next free
+-	 * region.
+-	 */
+-	mvpwm->chip.base = -1;
+ 
+ 	spin_lock_init(&mvpwm->lock);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/iommu-dart-add-missing-module-owner-to-ops-structure.patch b/queue-5.15/iommu-dart-add-missing-module-owner-to-ops-structure.patch
new file mode 100644
index 00000000000..5abae8513c3
--- /dev/null
+++ b/queue-5.15/iommu-dart-add-missing-module-owner-to-ops-structure.patch
@@ -0,0 +1,36 @@
+From 3871d84c07782172308cd3b4056a42517bdc8d50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 18:22:38 +0900
+Subject: iommu/dart: Add missing module owner to ops structure
+
+From: Hector Martin <marcan@marcan.st>
+
+[ Upstream commit 2ac2fab52917ae82cbca97cf6e5d2993530257ed ]
+
+This is required to make loading this as a module work.
+
+Signed-off-by: Hector Martin <marcan@marcan.st>
+Fixes: 46d1fb072e76 ("iommu/dart: Add DART iommu driver")
+Reviewed-by: Sven Peter <sven@svenpeter.dev>
+Link: https://lore.kernel.org/r/20220502092238.30486-1-marcan@marcan.st
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/apple-dart.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
+index 9c9bbccc00bd..baba4571c815 100644
+--- a/drivers/iommu/apple-dart.c
++++ b/drivers/iommu/apple-dart.c
+@@ -757,6 +757,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
+ 	.of_xlate = apple_dart_of_xlate,
+ 	.def_domain_type = apple_dart_def_domain_type,
+ 	.pgsize_bitmap = -1UL, /* Restricted during dart probe */
++	.owner = THIS_MODULE,
+ };
+ 
+ static irqreturn_t apple_dart_irq(int irq, void *dev)
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-lapic-enable-timer-posted-interrupt-only-when-mw.patch b/queue-5.15/kvm-lapic-enable-timer-posted-interrupt-only-when-mw.patch
new file mode 100644
index 00000000000..76150707117
--- /dev/null
+++ b/queue-5.15/kvm-lapic-enable-timer-posted-interrupt-only-when-mw.patch
@@ -0,0 +1,54 @@
+From 015b9b8612ebb385a08f2436f658a31289ab357e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 04:08:58 -0800
+Subject: KVM: LAPIC: Enable timer posted-interrupt only when mwait/hlt is
+ advertised
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+[ Upstream commit 1714a4eb6fb0cb79f182873cd011a8ed60ac65e8 ]
+
+As commit 0c5f81dad46 ("KVM: LAPIC: Inject timer interrupt via posted
+interrupt") mentioned that the host admin should well tune the guest
+setup, so that vCPUs are placed on isolated pCPUs, and with several pCPUs
+surplus for *busy* housekeeping.  In this setup, it is preferrable to
+disable mwait/hlt/pause vmexits to keep the vCPUs in non-root mode.
+
+However, if only some guests isolated and others not, they would not
+have any benefit from posted timer interrupts, and at the same time lose
+VMX preemption timer fast paths because kvm_can_post_timer_interrupt()
+returns true and therefore forces kvm_can_use_hv_timer() to false.
+
+By guaranteeing that posted-interrupt timer is only used if MWAIT or
+HLT are done without vmexit, KVM can make a better choice and use the
+VMX preemption timer and the corresponding fast paths.
+
+Reported-by: Aili Yao <yaoaili@kingsoft.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Cc: Aili Yao <yaoaili@kingsoft.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Message-Id: <1643112538-36743-1-git-send-email-wanpengli@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/lapic.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
+index 83d1743a1dd0..493d636e6231 100644
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -113,7 +113,8 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
+ 
+ static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+ {
+-	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
++	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
++		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
+ }
+ 
+ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-selftests-do-not-use-bitfields-larger-than-32-bi.patch b/queue-5.15/kvm-selftests-do-not-use-bitfields-larger-than-32-bi.patch
new file mode 100644
index 00000000000..54a2708d250
--- /dev/null
+++ b/queue-5.15/kvm-selftests-do-not-use-bitfields-larger-than-32-bi.patch
@@ -0,0 +1,433 @@
+From af02e8ffadf897472c41890285242a18d6e88632 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Apr 2022 06:27:27 -0400
+Subject: kvm: selftests: do not use bitfields larger than 32-bits for PTEs
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit f18b4aebe107d092e384b1ae680b1e1de7a0196d ]
+
+Red Hat's QE team reported test failure on access_tracking_perf_test:
+
+Testing guest mode: PA-bits:ANY, VA-bits:48,  4K pages
+guest physical test memory offset: 0x3fffbffff000
+
+Populating memory             : 0.684014577s
+Writing to populated memory   : 0.006230175s
+Reading from populated memory : 0.004557805s
+==== Test Assertion Failure ====
+  lib/kvm_util.c:1411: false
+  pid=125806 tid=125809 errno=4 - Interrupted system call
+     1  0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411
+     2   (inlined by) addr_gpa2hva at kvm_util.c:1405
+     3  0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98
+     4   (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152
+     5   (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232
+     6  0x00007fefe9ff81ce: ?? ??:0
+     7  0x00007fefe9c64d82: ?? ??:0
+  No vm physical memory at 0xffbffff000
+
+I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits
+PA.
+
+It turns out that the address translation for clearing idle page tracking
+returned a wrong result; addr_gva2gpa()'s last step, which is based on
+"pte[index[0]].pfn", did the calculation with 40 bits length and the
+high 12 bits got truncated.  In above case the GPA address to be returned
+should be 0x3fffbffff000 for GVA 0xc0000000, but it got truncated into
+0xffbffff000 and the subsequent gpa2hva lookup failed.
+
+The width of operations on bit fields greater than 32-bit is
+implementation defined, and differs between GCC (which uses the bitfield
+precision) and clang (which uses 64-bit arithmetic), so this is a
+potential minefield.  Remove the bit fields and using manual masking
+instead.
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036
+Reported-by: Nana Liu <nanliu@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Tested-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../selftests/kvm/include/x86_64/processor.h  |  15 ++
+ .../selftests/kvm/lib/x86_64/processor.c      | 192 +++++++-----------
+ 2 files changed, 92 insertions(+), 115 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
+index 05e65ca1c30c..23861c8faa61 100644
+--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
++++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
+@@ -58,6 +58,21 @@
+ /* CPUID.0x8000_0001.EDX */
+ #define CPUID_GBPAGES		(1ul << 26)
+ 
++/* Page table bitfield declarations */
++#define PTE_PRESENT_MASK        BIT_ULL(0)
++#define PTE_WRITABLE_MASK       BIT_ULL(1)
++#define PTE_USER_MASK           BIT_ULL(2)
++#define PTE_ACCESSED_MASK       BIT_ULL(5)
++#define PTE_DIRTY_MASK          BIT_ULL(6)
++#define PTE_LARGE_MASK          BIT_ULL(7)
++#define PTE_GLOBAL_MASK         BIT_ULL(8)
++#define PTE_NX_MASK             BIT_ULL(63)
++
++#define PAGE_SHIFT		12
++
++#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
++#define PTE_GET_PFN(pte)        (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
++
+ /* General Registers in 64-Bit Mode */
+ struct gpr64_regs {
+ 	u64 rax;
+diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
+index da73b97e1e6d..46057079d8bb 100644
+--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
++++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
+@@ -19,38 +19,6 @@
+ 
+ vm_vaddr_t exception_handlers;
+ 
+-/* Virtual translation table structure declarations */
+-struct pageUpperEntry {
+-	uint64_t present:1;
+-	uint64_t writable:1;
+-	uint64_t user:1;
+-	uint64_t write_through:1;
+-	uint64_t cache_disable:1;
+-	uint64_t accessed:1;
+-	uint64_t ignored_06:1;
+-	uint64_t page_size:1;
+-	uint64_t ignored_11_08:4;
+-	uint64_t pfn:40;
+-	uint64_t ignored_62_52:11;
+-	uint64_t execute_disable:1;
+-};
+-
+-struct pageTableEntry {
+-	uint64_t present:1;
+-	uint64_t writable:1;
+-	uint64_t user:1;
+-	uint64_t write_through:1;
+-	uint64_t cache_disable:1;
+-	uint64_t accessed:1;
+-	uint64_t dirty:1;
+-	uint64_t reserved_07:1;
+-	uint64_t global:1;
+-	uint64_t ignored_11_09:3;
+-	uint64_t pfn:40;
+-	uint64_t ignored_62_52:11;
+-	uint64_t execute_disable:1;
+-};
+-
+ void regs_dump(FILE *stream, struct kvm_regs *regs,
+ 	       uint8_t indent)
+ {
+@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+ 	return &page_table[index];
+ }
+ 
+-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+-						    uint64_t pt_pfn,
+-						    uint64_t vaddr,
+-						    uint64_t paddr,
+-						    int level,
+-						    enum x86_page_size page_size)
++static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
++				       uint64_t pt_pfn,
++				       uint64_t vaddr,
++				       uint64_t paddr,
++				       int level,
++				       enum x86_page_size page_size)
+ {
+-	struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+-
+-	if (!pte->present) {
+-		pte->writable = true;
+-		pte->present = true;
+-		pte->page_size = (level == page_size);
+-		if (pte->page_size)
+-			pte->pfn = paddr >> vm->page_shift;
++	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
++
++	if (!(*pte & PTE_PRESENT_MASK)) {
++		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
++		if (level == page_size)
++			*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ 		else
+-			pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
++			*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ 	} else {
+ 		/*
+ 		 * Entry already present.  Assert that the caller doesn't want
+@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+ 		TEST_ASSERT(level != page_size,
+ 			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ 			    page_size, vaddr);
+-		TEST_ASSERT(!pte->page_size,
++		TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ 			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ 			    level, vaddr);
+ 	}
+@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ 		   enum x86_page_size page_size)
+ {
+ 	const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+-	struct pageUpperEntry *pml4e, *pdpe, *pde;
+-	struct pageTableEntry *pte;
++	uint64_t *pml4e, *pdpe, *pde;
++	uint64_t *pte;
+ 
+ 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ 		    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ 	 */
+ 	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+ 				      vaddr, paddr, 3, page_size);
+-	if (pml4e->page_size)
++	if (*pml4e & PTE_LARGE_MASK)
+ 		return;
+ 
+-	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+-	if (pdpe->page_size)
++	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
++	if (*pdpe & PTE_LARGE_MASK)
+ 		return;
+ 
+-	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+-	if (pde->page_size)
++	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
++	if (*pde & PTE_LARGE_MASK)
+ 		return;
+ 
+ 	/* Fill in page table entry. */
+-	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+-	TEST_ASSERT(!pte->present,
++	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
++	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+-	pte->pfn = paddr >> vm->page_shift;
+-	pte->writable = true;
+-	pte->present = 1;
++	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ }
+ 
+ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+@@ -282,12 +246,12 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+ 	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+ }
+ 
+-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
++static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+ 						       uint64_t vaddr)
+ {
+ 	uint16_t index[4];
+-	struct pageUpperEntry *pml4e, *pdpe, *pde;
+-	struct pageTableEntry *pte;
++	uint64_t *pml4e, *pdpe, *pde;
++	uint64_t *pte;
+ 	struct kvm_cpuid_entry2 *entry;
+ 	struct kvm_sregs sregs;
+ 	int max_phy_addr;
+@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
+ 	index[3] = (vaddr >> 39) & 0x1ffu;
+ 
+ 	pml4e = addr_gpa2hva(vm, vm->pgd);
+-	TEST_ASSERT(pml4e[index[3]].present,
++	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
+ 		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
+-	TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+-		(rsvd_mask | (1ull << 7))) == 0,
++	TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
+ 		"Unexpected reserved bits set.");
+ 
+-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+-	TEST_ASSERT(pdpe[index[2]].present,
++	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
++	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
+ 		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
+-	TEST_ASSERT(pdpe[index[2]].page_size == 0,
++	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
+ 		"Expected pdpe to map a pde not a 1-GByte page.");
+-	TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
++	TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
+ 		"Unexpected reserved bits set.");
+ 
+-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+-	TEST_ASSERT(pde[index[1]].present,
++	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
++	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
+ 		"Expected pde to be present for gva: 0x%08lx", vaddr);
+-	TEST_ASSERT(pde[index[1]].page_size == 0,
++	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
+ 		"Expected pde to map a pte not a 2-MByte page.");
+-	TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
++	TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
+ 		"Unexpected reserved bits set.");
+ 
+-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+-	TEST_ASSERT(pte[index[0]].present,
++	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
++	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
+ 		"Expected pte to be present for gva: 0x%08lx", vaddr);
+ 
+ 	return &pte[index[0]];
+@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
+ 
+ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+ {
+-	struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
++	uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ 
+ 	return *(uint64_t *)pte;
+ }
+@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ 			     uint64_t pte)
+ {
+-	struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+-								  vaddr);
++	uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ 
+ 	*(uint64_t *)new_pte = pte;
+ }
+ 
+ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+ {
+-	struct pageUpperEntry *pml4e, *pml4e_start;
+-	struct pageUpperEntry *pdpe, *pdpe_start;
+-	struct pageUpperEntry *pde, *pde_start;
+-	struct pageTableEntry *pte, *pte_start;
++	uint64_t *pml4e, *pml4e_start;
++	uint64_t *pdpe, *pdpe_start;
++	uint64_t *pde, *pde_start;
++	uint64_t *pte, *pte_start;
+ 
+ 	if (!vm->pgd_created)
+ 		return;
+@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+ 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
+ 		"addr         w exec dirty\n",
+ 		indent, "");
+-	pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
++	pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ 		pml4e = &pml4e_start[n1];
+-		if (!pml4e->present)
++		if (!(*pml4e & PTE_PRESENT_MASK))
+ 			continue;
+-		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
++		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
+ 			" %u\n",
+ 			indent, "",
+ 			pml4e - pml4e_start, pml4e,
+-			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
+-			pml4e->writable, pml4e->execute_disable);
++			addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
++			!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+ 
+-		pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
++		pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
+ 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ 			pdpe = &pdpe_start[n2];
+-			if (!pdpe->present)
++			if (!(*pdpe & PTE_PRESENT_MASK))
+ 				continue;
+-			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
++			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
+ 				"%u  %u\n",
+ 				indent, "",
+ 				pdpe - pdpe_start, pdpe,
+ 				addr_hva2gpa(vm, pdpe),
+-				(uint64_t) pdpe->pfn, pdpe->writable,
+-				pdpe->execute_disable);
++				PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
++				!!(*pdpe & PTE_NX_MASK));
+ 
+-			pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
++			pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
+ 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ 				pde = &pde_start[n3];
+-				if (!pde->present)
++				if (!(*pde & PTE_PRESENT_MASK))
+ 					continue;
+ 				fprintf(stream, "%*spde   0x%-3zx %p "
+-					"0x%-12lx 0x%-10lx %u  %u\n",
++					"0x%-12lx 0x%-10llx %u  %u\n",
+ 					indent, "", pde - pde_start, pde,
+ 					addr_hva2gpa(vm, pde),
+-					(uint64_t) pde->pfn, pde->writable,
+-					pde->execute_disable);
++					PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
++					!!(*pde & PTE_NX_MASK));
+ 
+-				pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
++				pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
+ 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ 					pte = &pte_start[n4];
+-					if (!pte->present)
++					if (!(*pte & PTE_PRESENT_MASK))
+ 						continue;
+ 					fprintf(stream, "%*spte   0x%-3zx %p "
+-						"0x%-12lx 0x%-10lx %u  %u "
++						"0x%-12lx 0x%-10llx %u  %u "
+ 						"    %u    0x%-10lx\n",
+ 						indent, "",
+ 						pte - pte_start, pte,
+ 						addr_hva2gpa(vm, pte),
+-						(uint64_t) pte->pfn,
+-						pte->writable,
+-						pte->execute_disable,
+-						pte->dirty,
++						PTE_GET_PFN(*pte),
++						!!(*pte & PTE_WRITABLE_MASK),
++						!!(*pte & PTE_NX_MASK),
++						!!(*pte & PTE_DIRTY_MASK),
+ 						((uint64_t) n1 << 27)
+ 							| ((uint64_t) n2 << 18)
+ 							| ((uint64_t) n3 << 9)
+@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+ {
+ 	uint16_t index[4];
+-	struct pageUpperEntry *pml4e, *pdpe, *pde;
+-	struct pageTableEntry *pte;
++	uint64_t *pml4e, *pdpe, *pde;
++	uint64_t *pte;
+ 
+ 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+ 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+ 	if (!vm->pgd_created)
+ 		goto unmapped_gva;
+ 	pml4e = addr_gpa2hva(vm, vm->pgd);
+-	if (!pml4e[index[3]].present)
++	if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
+ 		goto unmapped_gva;
+ 
+-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+-	if (!pdpe[index[2]].present)
++	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
++	if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
+ 		goto unmapped_gva;
+ 
+-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+-	if (!pde[index[1]].present)
++	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
++	if (!(pde[index[1]] & PTE_PRESENT_MASK))
+ 		goto unmapped_gva;
+ 
+-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+-	if (!pte[index[0]].present)
++	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
++	if (!(pte[index[0]] & PTE_PRESENT_MASK))
+ 		goto unmapped_gva;
+ 
+-	return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
++	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
+ 
+ unmapped_gva:
+ 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-selftests-silence-compiler-warning-in-the-kvm_pa.patch b/queue-5.15/kvm-selftests-silence-compiler-warning-in-the-kvm_pa.patch
new file mode 100644
index 00000000000..0368dedd7d6
--- /dev/null
+++ b/queue-5.15/kvm-selftests-silence-compiler-warning-in-the-kvm_pa.patch
@@ -0,0 +1,48 @@
+From 1c4fe2e5ba211b99c22d5f594a27a4e3af5af7ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Apr 2022 12:30:31 +0200
+Subject: KVM: selftests: Silence compiler warning in the kvm_page_table_test
+
+From: Thomas Huth <thuth@redhat.com>
+
+[ Upstream commit 266a19a0bc4fbfab4d981a47640ca98972a01865 ]
+
+When compiling kvm_page_table_test.c, I get this compiler warning
+with gcc 11.2:
+
+kvm_page_table_test.c: In function 'pre_init_before_test':
+../../../../tools/include/linux/kernel.h:44:24: warning: comparison of
+ distinct pointer types lacks a cast
+   44 |         (void) (&_max1 == &_max2);              \
+      |                        ^~
+kvm_page_table_test.c:281:21: note: in expansion of macro 'max'
+  281 |         alignment = max(0x100000, alignment);
+      |                     ^~~
+
+Fix it by adjusting the type of the absolute value.
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Message-Id: <20220414103031.565037-1-thuth@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
+index 36407cb0ec85..f1ddfe4c4a03 100644
+--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
++++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
+@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
+ 	else
+ 		guest_test_phys_mem = p->phys_offset;
+ #ifdef __s390x__
+-	alignment = max(0x100000, alignment);
++	alignment = max(0x100000UL, alignment);
+ #endif
+ 	guest_test_phys_mem &= ~(alignment - 1);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch b/queue-5.15/kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch
new file mode 100644
index 00000000000..7d81b7f998c
--- /dev/null
+++ b/queue-5.15/kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch
@@ -0,0 +1,54 @@
+From d1bf2b8d8bd864a1db8cc3677e64f07594122d8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Apr 2022 17:01:49 +0530
+Subject: kvm: x86/cpuid: Only provide CPUID leaf 0xA if host has architectural
+ PMU
+
+From: Sandipan Das <sandipan.das@amd.com>
+
+[ Upstream commit 5a1bde46f98b893cda6122b00e94c0c40a6ead3c ]
+
+On some x86 processors, CPUID leaf 0xA provides information
+on Architectural Performance Monitoring features. It
+advertises a PMU version which Qemu uses to determine the
+availability of additional MSRs to manage the PMCs.
+
+Upon receiving a KVM_GET_SUPPORTED_CPUID ioctl request for
+the same, the kernel constructs return values based on the
+x86_pmu_capability irrespective of the vendor.
+
+This leaf and the additional MSRs are not supported on AMD
+and Hygon processors. If AMD PerfMonV2 is detected, the PMU
+version is set to 2 and guest startup breaks because of an
+attempt to access a non-existent MSR. Return zeros to avoid
+this.
+
+Fixes: a6c06ed1a60a ("KVM: Expose the architectural performance monitoring CPUID leaf")
+Reported-by: Vasant Hegde <vasant.hegde@amd.com>
+Signed-off-by: Sandipan Das <sandipan.das@amd.com>
+Message-Id: <3fef83d9c2b2f7516e8ff50d60851f29a4bcb716.1651058600.git.sandipan.das@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/cpuid.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index 5f1d4a5aa871..b17c9b00669e 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -725,6 +725,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
+ 		union cpuid10_eax eax;
+ 		union cpuid10_edx edx;
+ 
++		if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
++			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
++			break;
++		}
++
+ 		perf_get_x86_pmu_capability(&cap);
+ 
+ 		/*
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-x86-do-not-change-icr-on-write-to-apic_self_ipi.patch b/queue-5.15/kvm-x86-do-not-change-icr-on-write-to-apic_self_ipi.patch
new file mode 100644
index 00000000000..d57a983de9e
--- /dev/null
+++ b/queue-5.15/kvm-x86-do-not-change-icr-on-write-to-apic_self_ipi.patch
@@ -0,0 +1,43 @@
+From d7f8ff94b3ea03fd24acbfff4a12e1dd6fde3b03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Feb 2022 09:53:36 -0500
+Subject: KVM: x86: Do not change ICR on write to APIC_SELF_IPI
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit d22a81b304a27fca6124174a8e842e826c193466 ]
+
+Emulating writes to SELF_IPI with a write to ICR has an unwanted side effect:
+the value of ICR in vAPIC page gets changed.  The lists SELF_IPI as write-only,
+with no associated MMIO offset, so any write should have no visible side
+effect in the vAPIC page.
+
+Reported-by: Chao Gao <chao.gao@intel.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/lapic.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
+index 4d92fb4fdf69..83d1743a1dd0 100644
+--- a/arch/x86/kvm/lapic.c
++++ b/arch/x86/kvm/lapic.c
+@@ -2125,10 +2125,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
+ 		break;
+ 
+ 	case APIC_SELF_IPI:
+-		if (apic_x2apic_mode(apic)) {
+-			kvm_lapic_reg_write(apic, APIC_ICR,
+-					    APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
+-		} else
++		if (apic_x2apic_mode(apic))
++			kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
++		else
+ 			ret = 1;
+ 		break;
+ 	default:
+-- 
+2.35.1
+
diff --git a/queue-5.15/kvm-x86-mmu-avoid-null-pointer-dereference-on-page-f.patch b/queue-5.15/kvm-x86-mmu-avoid-null-pointer-dereference-on-page-f.patch
new file mode 100644
index 00000000000..c5d041d2196
--- /dev/null
+++ b/queue-5.15/kvm-x86-mmu-avoid-null-pointer-dereference-on-page-f.patch
@@ -0,0 +1,39 @@
+From f022b17acc9add41057f83b1314aa279448a072e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Feb 2022 19:08:33 -0500
+Subject: KVM: x86/mmu: avoid NULL-pointer dereference on page freeing bugs
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 9191b8f0745e63edf519e4a54a4aaae1d3d46fbd ]
+
+WARN and bail if KVM attempts to free a root that isn't backed by a shadow
+page.  KVM allocates a bare page for "special" roots, e.g. when using PAE
+paging or shadowing 2/3/4-level page tables with 4/5-level, and so root_hpa
+will be valid but won't be backed by a shadow page.  It's all too easy to
+blindly call mmu_free_root_page() on root_hpa, be nice and WARN instead of
+crashing KVM and possibly the kernel.
+
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 34e828badc51..806f9d42bcce 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -3314,6 +3314,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+ 		return;
+ 
+ 	sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
++	if (WARN_ON(!sp))
++		return;
+ 
+ 	if (is_tdp_mmu_page(sp))
+ 		kvm_tdp_mmu_put_root(kvm, sp, false);
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5-fix-slab-out-of-bounds-while-reading-resour.patch b/queue-5.15/net-mlx5-fix-slab-out-of-bounds-while-reading-resour.patch
new file mode 100644
index 00000000000..844482178c7
--- /dev/null
+++ b/queue-5.15/net-mlx5-fix-slab-out-of-bounds-while-reading-resour.patch
@@ -0,0 +1,151 @@
+From 3bd84b07d25dd16503d38a932562bdc7d0840fa9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Mar 2022 19:02:03 +0200
+Subject: net/mlx5: Fix slab-out-of-bounds while reading resource dump menu
+
+From: Aya Levin <ayal@nvidia.com>
+
+[ Upstream commit 7ba2d9d8de96696c1451fee1b01da11f45bdc2b9 ]
+
+Resource dump menu may span over more than a single page, support it.
+Otherwise, menu read may result in a memory access violation: reading
+outside of the allocated page.
+Note that page format of the first menu page contains menu headers while
+the proceeding menu pages contain only records.
+
+The KASAN logs are as follows:
+BUG: KASAN: slab-out-of-bounds in strcmp+0x9b/0xb0
+Read of size 1 at addr ffff88812b2e1fd0 by task systemd-udevd/496
+
+CPU: 5 PID: 496 Comm: systemd-udevd Tainted: G    B  5.16.0_for_upstream_debug_2022_01_10_23_12 #1
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x57/0x7d
+ print_address_description.constprop.0+0x1f/0x140
+ ? strcmp+0x9b/0xb0
+ ? strcmp+0x9b/0xb0
+ kasan_report.cold+0x83/0xdf
+ ? strcmp+0x9b/0xb0
+ strcmp+0x9b/0xb0
+ mlx5_rsc_dump_init+0x4ab/0x780 [mlx5_core]
+ ? mlx5_rsc_dump_destroy+0x80/0x80 [mlx5_core]
+ ? lockdep_hardirqs_on_prepare+0x286/0x400
+ ? raw_spin_unlock_irqrestore+0x47/0x50
+ ? aomic_notifier_chain_register+0x32/0x40
+ mlx5_load+0x104/0x2e0 [mlx5_core]
+ mlx5_init_one+0x41b/0x610 [mlx5_core]
+ ....
+The buggy address belongs to the object at ffff88812b2e0000
+ which belongs to the cache kmalloc-4k of size 4096
+The buggy address is located 4048 bytes to the right of
+ 4096-byte region [ffff88812b2e0000, ffff88812b2e1000)
+The buggy address belongs to the page:
+page:000000009d69807a refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88812b2e6000 pfn:0x12b2e0
+head:000000009d69807a order:3 compound_mapcount:0 compound_pincount:0
+flags: 0x8000000000010200(slab|head|zone=2)
+raw: 8000000000010200 0000000000000000 dead000000000001 ffff888100043040
+raw: ffff88812b2e6000 0000000080040000 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff88812b2e1e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff88812b2e1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+>ffff88812b2e1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+                                                 ^
+ ffff88812b2e2000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff88812b2e2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+==================================================================
+
+Fixes: 12206b17235a ("net/mlx5: Add support for resource dump")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/diag/rsc_dump.c        | 31 +++++++++++++++----
+ 1 file changed, 25 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+index ed4fb79b4db7..75b6060f7a9a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+@@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
+ struct mlx5_rsc_dump {
+ 	u32 pdn;
+ 	struct mlx5_core_mkey mkey;
++	u32 number_of_menu_items;
+ 	u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
+ };
+ 
+@@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
+ 	return -EINVAL;
+ }
+ 
+-static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page)
++#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
++					MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
++					MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
++
++static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
++					int read_size, int start_idx)
+ {
+ 	void *data = page_address(page);
+ 	enum mlx5_sgmt_type sgmt_idx;
+ 	int num_of_items;
+ 	char *sgmt_name;
+ 	void *member;
++	int size = 0;
+ 	void *menu;
+ 	int i;
+ 
+-	menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+-	num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records);
++	if (!start_idx) {
++		menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
++		rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
++							  num_of_records);
++		size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
++		data += size;
++	}
++	num_of_items = rsc_dump->number_of_menu_items;
++
++	for (i = 0; start_idx + i < num_of_items; i++) {
++		size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
++		if (size >= read_size)
++			return start_idx + i;
+ 
+-	for (i = 0; i < num_of_items; i++) {
+-		member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]);
++		member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
+ 		sgmt_name =  MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
+ 		sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
+ 		if (sgmt_idx == -EINVAL)
+@@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct
+ 		rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
+ 							       member, segment_type);
+ 	}
++	return 0;
+ }
+ 
+ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+@@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+ 	struct mlx5_rsc_dump_cmd *cmd = NULL;
+ 	struct mlx5_rsc_key key = {};
+ 	struct page *page;
++	int start_idx = 0;
+ 	int size;
+ 	int err;
+ 
+@@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
+ 		if (err < 0)
+ 			goto destroy_cmd;
+ 
+-		mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page);
++		start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
+ 
+ 	} while (err > 0);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5e-lag-don-t-skip-fib-events-on-current-dst.patch b/queue-5.15/net-mlx5e-lag-don-t-skip-fib-events-on-current-dst.patch
new file mode 100644
index 00000000000..922109b88b7
--- /dev/null
+++ b/queue-5.15/net-mlx5e-lag-don-t-skip-fib-events-on-current-dst.patch
@@ -0,0 +1,114 @@
+From 713bcccad531624658a1145da54f277378f13b80 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 17:40:37 +0300
+Subject: net/mlx5e: Lag, Don't skip fib events on current dst
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 4a2a664ed87962c4ddb806a84b5c9634820bcf55 ]
+
+Referenced change added check to skip updating fib when new fib instance
+has same or lower priority. However, new fib instance can be an update on
+same dst address as existing one even though the structure is another
+instance that has different address. Ignoring events on such instances
+causes multipath LAG state to not be correctly updated.
+
+Track 'dst' and 'dst_len' fields of fib event fib_entry_notifier_info
+structure and don't skip events that have the same value of that fields.
+
+Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/lag_mp.c  | 20 +++++++++++--------
+ .../net/ethernet/mellanox/mlx5/core/lag_mp.h  |  2 ++
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+index 9d50b9c2db5e..81786a9a424c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+@@ -100,10 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+ 	flush_workqueue(mp->wq);
+ }
+ 
+-static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi)
++static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+ {
+ 	mp->fib.mfi = fi;
+ 	mp->fib.priority = fi->fib_priority;
++	mp->fib.dst = dst;
++	mp->fib.dst_len = dst_len;
+ }
+ 
+ struct mlx5_fib_event_work {
+@@ -116,10 +118,10 @@ struct mlx5_fib_event_work {
+ 	};
+ };
+ 
+-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+-				     unsigned long event,
+-				     struct fib_info *fi)
++static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
++				     struct fib_entry_notifier_info *fen_info)
+ {
++	struct fib_info *fi = fen_info->fi;
+ 	struct lag_mp *mp = &ldev->lag_mp;
+ 	struct fib_nh *fib_nh0, *fib_nh1;
+ 	unsigned int nhs;
+@@ -133,7 +135,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 	}
+ 
+ 	/* Handle multipath entry with lower priority value */
+-	if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority)
++	if (mp->fib.mfi && mp->fib.mfi != fi &&
++	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
++	    fi->fib_priority >= mp->fib.priority)
+ 		return;
+ 
+ 	/* Handle add/replace event */
+@@ -149,7 +153,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 
+ 			i++;
+ 			mlx5_lag_set_port_affinity(ldev, i);
+-			mlx5_lag_fib_set(mp, fi);
++			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+ 		}
+ 
+ 		return;
+@@ -179,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 	}
+ 
+ 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
+-	mlx5_lag_fib_set(mp, fi);
++	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
+ }
+ 
+ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+@@ -220,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work)
+ 	case FIB_EVENT_ENTRY_REPLACE:
+ 	case FIB_EVENT_ENTRY_DEL:
+ 		mlx5_lag_fib_route_event(ldev, fib_work->event,
+-					 fib_work->fen_info.fi);
++					 &fib_work->fen_info);
+ 		fib_info_put(fib_work->fen_info.fi);
+ 		break;
+ 	case FIB_EVENT_NH_ADD:
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+index e8380eb0dd6a..b3a7f18b9e30 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+@@ -18,6 +18,8 @@ struct lag_mp {
+ 	struct {
+ 		const void        *mfi; /* used in tracking fib events */
+ 		u32               priority;
++		u32               dst;
++		int               dst_len;
+ 	} fib;
+ 	struct workqueue_struct   *wq;
+ };
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5e-lag-fix-fib_info-pointer-assignment.patch b/queue-5.15/net-mlx5e-lag-fix-fib_info-pointer-assignment.patch
new file mode 100644
index 00000000000..8470372e7e9
--- /dev/null
+++ b/queue-5.15/net-mlx5e-lag-fix-fib_info-pointer-assignment.patch
@@ -0,0 +1,40 @@
+From 25544ec0a67cdd42e9d7ee795e8e2bd7aff82c3a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 17:32:54 +0300
+Subject: net/mlx5e: Lag, Fix fib_info pointer assignment
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit a6589155ec9847918e00e7279b8aa6d4c272bea7 ]
+
+Referenced change incorrectly sets single path fib_info even when LAG is
+not active. Fix it by moving call to mlx5_lag_fib_set() into conditional
+that verifies LAG state.
+
+Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+index 8d278c45e7cc..9d50b9c2db5e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+@@ -149,9 +149,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 
+ 			i++;
+ 			mlx5_lag_set_port_affinity(ldev, i);
++			mlx5_lag_fib_set(mp, fi);
+ 		}
+ 
+-		mlx5_lag_fib_set(mp, fi);
+ 		return;
+ 	}
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mlx5e-lag-fix-use-after-free-in-fib-event-handle.patch b/queue-5.15/net-mlx5e-lag-fix-use-after-free-in-fib-event-handle.patch
new file mode 100644
index 00000000000..0ea53523808
--- /dev/null
+++ b/queue-5.15/net-mlx5e-lag-fix-use-after-free-in-fib-event-handle.patch
@@ -0,0 +1,247 @@
+From 402301a852f26350a1417bd70828a51eb9716dba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 17:32:19 +0300
+Subject: net/mlx5e: Lag, Fix use-after-free in fib event handler
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit 27b0420fd959e38e3500e60b637d39dfab065645 ]
+
+Recent commit that modified fib route event handler to handle events
+according to their priority introduced use-after-free[0] in mp->mfi pointer
+usage. The pointer now is not just cached in order to be compared to
+following fib_info instances, but is also dereferenced to obtain
+fib_priority. However, since mlx5 lag code doesn't hold the reference to
+fin_info during whole mp->mfi lifetime, it could be used after fib_info
+instance has already been freed be kernel infrastructure code.
+
+Don't ever dereference mp->mfi pointer. Refactor it to be 'const void*'
+type and cache fib_info priority in dedicated integer. Group
+fib_info-related data into dedicated 'fib' structure that will be further
+extended by following patches in the series.
+
+[0]:
+
+[  203.588029] ==================================================================
+[  203.590161] BUG: KASAN: use-after-free in mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core]
+[  203.592386] Read of size 4 at addr ffff888144df2050 by task kworker/u20:4/138
+
+[  203.594766] CPU: 3 PID: 138 Comm: kworker/u20:4 Tainted: G    B             5.17.0-rc7+ #6
+[  203.596751] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[  203.598813] Workqueue: mlx5_lag_mp mlx5_lag_fib_update [mlx5_core]
+[  203.600053] Call Trace:
+[  203.600608]  <TASK>
+[  203.601110]  dump_stack_lvl+0x48/0x5e
+[  203.601860]  print_address_description.constprop.0+0x1f/0x160
+[  203.602950]  ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core]
+[  203.604073]  ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core]
+[  203.605177]  kasan_report.cold+0x83/0xdf
+[  203.605969]  ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core]
+[  203.607102]  mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core]
+[  203.608199]  ? mlx5_lag_init_fib_work+0x1c0/0x1c0 [mlx5_core]
+[  203.609382]  ? read_word_at_a_time+0xe/0x20
+[  203.610463]  ? strscpy+0xa0/0x2a0
+[  203.611463]  process_one_work+0x722/0x1270
+[  203.612344]  worker_thread+0x540/0x11e0
+[  203.613136]  ? rescuer_thread+0xd50/0xd50
+[  203.613949]  kthread+0x26e/0x300
+[  203.614627]  ? kthread_complete_and_exit+0x20/0x20
+[  203.615542]  ret_from_fork+0x1f/0x30
+[  203.616273]  </TASK>
+
+[  203.617174] Allocated by task 3746:
+[  203.617874]  kasan_save_stack+0x1e/0x40
+[  203.618644]  __kasan_kmalloc+0x81/0xa0
+[  203.619394]  fib_create_info+0xb41/0x3c50
+[  203.620213]  fib_table_insert+0x190/0x1ff0
+[  203.621020]  fib_magic.isra.0+0x246/0x2e0
+[  203.621803]  fib_add_ifaddr+0x19f/0x670
+[  203.622563]  fib_inetaddr_event+0x13f/0x270
+[  203.623377]  blocking_notifier_call_chain+0xd4/0x130
+[  203.624355]  __inet_insert_ifa+0x641/0xb20
+[  203.625185]  inet_rtm_newaddr+0xc3d/0x16a0
+[  203.626009]  rtnetlink_rcv_msg+0x309/0x880
+[  203.626826]  netlink_rcv_skb+0x11d/0x340
+[  203.627626]  netlink_unicast+0x4cc/0x790
+[  203.628430]  netlink_sendmsg+0x762/0xc00
+[  203.629230]  sock_sendmsg+0xb2/0xe0
+[  203.629955]  ____sys_sendmsg+0x58a/0x770
+[  203.630756]  ___sys_sendmsg+0xd8/0x160
+[  203.631523]  __sys_sendmsg+0xb7/0x140
+[  203.632294]  do_syscall_64+0x35/0x80
+[  203.633045]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+[  203.634427] Freed by task 0:
+[  203.635063]  kasan_save_stack+0x1e/0x40
+[  203.635844]  kasan_set_track+0x21/0x30
+[  203.636618]  kasan_set_free_info+0x20/0x30
+[  203.637450]  __kasan_slab_free+0xfc/0x140
+[  203.638271]  kfree+0x94/0x3b0
+[  203.638903]  rcu_core+0x5e4/0x1990
+[  203.639640]  __do_softirq+0x1ba/0x5d3
+
+[  203.640828] Last potentially related work creation:
+[  203.641785]  kasan_save_stack+0x1e/0x40
+[  203.642571]  __kasan_record_aux_stack+0x9f/0xb0
+[  203.643478]  call_rcu+0x88/0x9c0
+[  203.644178]  fib_release_info+0x539/0x750
+[  203.644997]  fib_table_delete+0x659/0xb80
+[  203.645809]  fib_magic.isra.0+0x1a3/0x2e0
+[  203.646617]  fib_del_ifaddr+0x93f/0x1300
+[  203.647415]  fib_inetaddr_event+0x9f/0x270
+[  203.648251]  blocking_notifier_call_chain+0xd4/0x130
+[  203.649225]  __inet_del_ifa+0x474/0xc10
+[  203.650016]  devinet_ioctl+0x781/0x17f0
+[  203.650788]  inet_ioctl+0x1ad/0x290
+[  203.651533]  sock_do_ioctl+0xce/0x1c0
+[  203.652315]  sock_ioctl+0x27b/0x4f0
+[  203.653058]  __x64_sys_ioctl+0x124/0x190
+[  203.653850]  do_syscall_64+0x35/0x80
+[  203.654608]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+[  203.666952] The buggy address belongs to the object at ffff888144df2000
+                which belongs to the cache kmalloc-256 of size 256
+[  203.669250] The buggy address is located 80 bytes inside of
+                256-byte region [ffff888144df2000, ffff888144df2100)
+[  203.671332] The buggy address belongs to the page:
+[  203.672273] page:00000000bf6c9314 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x144df0
+[  203.674009] head:00000000bf6c9314 order:2 compound_mapcount:0 compound_pincount:0
+[  203.675422] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
+[  203.676819] raw: 002ffff800010200 0000000000000000 dead000000000122 ffff888100042b40
+[  203.678384] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
+[  203.679928] page dumped because: kasan: bad access detected
+
+[  203.681455] Memory state around the buggy address:
+[  203.682421]  ffff888144df1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  203.683863]  ffff888144df1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  203.685310] >ffff888144df2000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  203.686701]                                                  ^
+[  203.687820]  ffff888144df2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  203.689226]  ffff888144df2100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  203.690620] ==================================================================
+
+Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/lag_mp.c  | 26 ++++++++++++-------
+ .../net/ethernet/mellanox/mlx5/core/lag_mp.h  |  5 +++-
+ 2 files changed, 20 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+index cb0a48d374a3..8d278c45e7cc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+@@ -100,6 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
+ 	flush_workqueue(mp->wq);
+ }
+ 
++static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi)
++{
++	mp->fib.mfi = fi;
++	mp->fib.priority = fi->fib_priority;
++}
++
+ struct mlx5_fib_event_work {
+ 	struct work_struct work;
+ 	struct mlx5_lag *ldev;
+@@ -121,13 +127,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 	/* Handle delete event */
+ 	if (event == FIB_EVENT_ENTRY_DEL) {
+ 		/* stop track */
+-		if (mp->mfi == fi)
+-			mp->mfi = NULL;
++		if (mp->fib.mfi == fi)
++			mp->fib.mfi = NULL;
+ 		return;
+ 	}
+ 
+ 	/* Handle multipath entry with lower priority value */
+-	if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
++	if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority)
+ 		return;
+ 
+ 	/* Handle add/replace event */
+@@ -145,7 +151,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 			mlx5_lag_set_port_affinity(ldev, i);
+ 		}
+ 
+-		mp->mfi = fi;
++		mlx5_lag_fib_set(mp, fi);
+ 		return;
+ 	}
+ 
+@@ -165,7 +171,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 	}
+ 
+ 	/* First time we see multipath route */
+-	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
++	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
+ 		struct lag_tracker tracker;
+ 
+ 		tracker = ldev->tracker;
+@@ -173,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
+ 	}
+ 
+ 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
+-	mp->mfi = fi;
++	mlx5_lag_fib_set(mp, fi);
+ }
+ 
+ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+@@ -184,7 +190,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
+ 	struct lag_mp *mp = &ldev->lag_mp;
+ 
+ 	/* Check the nh event is related to the route */
+-	if (!mp->mfi || mp->mfi != fi)
++	if (!mp->fib.mfi || mp->fib.mfi != fi)
+ 		return;
+ 
+ 	/* nh added/removed */
+@@ -313,7 +319,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+ 	/* Clear mfi, as it might become stale when a route delete event
+ 	 * has been missed, see mlx5_lag_fib_route_event().
+ 	 */
+-	ldev->lag_mp.mfi = NULL;
++	ldev->lag_mp.fib.mfi = NULL;
+ }
+ 
+ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+@@ -324,7 +330,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
+ 	/* always clear mfi, as it might become stale when a route delete event
+ 	 * has been missed
+ 	 */
+-	mp->mfi = NULL;
++	mp->fib.mfi = NULL;
+ 
+ 	if (mp->fib_nb.notifier_call)
+ 		return 0;
+@@ -354,5 +360,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
+ 	unregister_fib_notifier(&init_net, &mp->fib_nb);
+ 	destroy_workqueue(mp->wq);
+ 	mp->fib_nb.notifier_call = NULL;
+-	mp->mfi = NULL;
++	mp->fib.mfi = NULL;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+index dea199e79bed..e8380eb0dd6a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+@@ -15,7 +15,10 @@ enum mlx5_lag_port_affinity {
+ 
+ struct lag_mp {
+ 	struct notifier_block     fib_nb;
+-	struct fib_info           *mfi; /* used in tracking fib events */
++	struct {
++		const void        *mfi; /* used in tracking fib events */
++		u32               priority;
++	} fib;
+ 	struct workqueue_struct   *wq;
+ };
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch b/queue-5.15/nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch
new file mode 100644
index 00000000000..f5dbc1bb2f3
--- /dev/null
+++ b/queue-5.15/nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch
@@ -0,0 +1,70 @@
+From 27a214b70554e2dea2d36d8b6344d681049d826e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 May 2022 13:58:47 +0800
+Subject: NFC: netlink: fix sleep in atomic bug when firmware download timeout
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit 4071bf121d59944d5cd2238de0642f3d7995a997 ]
+
+There are sleep in atomic bug that could cause kernel panic during
+firmware download process. The root cause is that nlmsg_new with
+GFP_KERNEL parameter is called in fw_dnld_timeout which is a timer
+handler. The call trace is shown below:
+
+BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265
+Call Trace:
+kmem_cache_alloc_node
+__alloc_skb
+nfc_genl_fw_download_done
+call_timer_fn
+__run_timers.part.0
+run_timer_softirq
+__do_softirq
+...
+
+The nlmsg_new with GFP_KERNEL parameter may sleep during memory
+allocation process, and the timer handler is run as the result of
+a "software interrupt" that should not call any other function
+that could sleep.
+
+This patch changes allocation mode of netlink message from GFP_KERNEL
+to GFP_ATOMIC in order to prevent sleep in atomic bug. The GFP_ATOMIC
+flag makes memory allocation operation could be used in atomic context.
+
+Fixes: 9674da8759df ("NFC: Add firmware upload netlink command")
+Fixes: 9ea7187c53f6 ("NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Link: https://lore.kernel.org/r/20220504055847.38026-1-duoming@zju.edu.cn
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/nfc/netlink.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
+index a207f0b8137b..60fc85781373 100644
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -534,7 +534,7 @@ int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
+ 	struct sk_buff *msg;
+ 	void *hdr;
+ 
+-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
++	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ 	if (!msg)
+ 		return -ENOMEM;
+ 
+@@ -554,7 +554,7 @@ int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
+ 
+ 	genlmsg_end(msg, hdr);
+ 
+-	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
++	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
+ 
+ 	return 0;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.15/selftest-vm-verify-mmap-addr-in-mremap_test.patch b/queue-5.15/selftest-vm-verify-mmap-addr-in-mremap_test.patch
new file mode 100644
index 00000000000..06462b0f625
--- /dev/null
+++ b/queue-5.15/selftest-vm-verify-mmap-addr-in-mremap_test.patch
@@ -0,0 +1,73 @@
+From 2d5fcd6a350f33e110f5b7da453fd5db9bc0f3ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Apr 2022 16:35:49 -0700
+Subject: selftest/vm: verify mmap addr in mremap_test
+
+From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
+
+[ Upstream commit 9c85a9bae267f6b5e5e374d0d023bbbe9db096d3 ]
+
+Avoid calling mmap with requested addresses that are less than the
+system's mmap_min_addr.  When run as root, mmap returns EACCES when
+trying to map addresses < mmap_min_addr.  This is not one of the error
+codes for the condition to retry the mmap in the test.
+
+Rather than arbitrarily retrying on EACCES, don't attempt an mmap until
+addr > vm.mmap_min_addr.
+
+Add a munmap call after an alignment check as the mappings are retained
+after the retry and can reach the vm.max_map_count sysctl.
+
+Link: https://lkml.kernel.org/r/20220420215721.4868-1-sidhartha.kumar@oracle.com
+Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
+Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/mremap_test.c | 29 ++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
+index e3ce33a9954e..efcbf537b3d5 100644
+--- a/tools/testing/selftests/vm/mremap_test.c
++++ b/tools/testing/selftests/vm/mremap_test.c
+@@ -66,6 +66,35 @@ enum {
+ 	.expect_failure = should_fail				\
+ }
+ 
++/* Returns mmap_min_addr sysctl tunable from procfs */
++static unsigned long long get_mmap_min_addr(void)
++{
++	FILE *fp;
++	int n_matched;
++	static unsigned long long addr;
++
++	if (addr)
++		return addr;
++
++	fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
++	if (fp == NULL) {
++		ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
++			strerror(errno));
++		exit(KSFT_SKIP);
++	}
++
++	n_matched = fscanf(fp, "%llu", &addr);
++	if (n_matched != 1) {
++		ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
++			strerror(errno));
++		fclose(fp);
++		exit(KSFT_SKIP);
++	}
++
++	fclose(fp);
++	return addr;
++}
++
+ /*
+  * Returns false if the requested remap region overlaps with an
+  * existing mapping (e.g text, stack) else returns true.
+-- 
+2.35.1
+
diff --git a/queue-5.15/selftest-vm-verify-remap-destination-address-in-mrem.patch b/queue-5.15/selftest-vm-verify-remap-destination-address-in-mrem.patch
new file mode 100644
index 00000000000..3e004f42d34
--- /dev/null
+++ b/queue-5.15/selftest-vm-verify-remap-destination-address-in-mrem.patch
@@ -0,0 +1,66 @@
+From 6d916cf1190f0a1c637fd714c8b129266c8b05a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Apr 2022 16:35:52 -0700
+Subject: selftest/vm: verify remap destination address in mremap_test
+
+From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
+
+[ Upstream commit 18d609daa546c919fd36b62a7b510c18de4b4af8 ]
+
+Because mremap does not have a MAP_FIXED_NOREPLACE flag, it can destroy
+existing mappings.  This causes a segfault when regions such as text are
+remapped and the permissions are changed.
+
+Verify the requested mremap destination address does not overlap any
+existing mappings by using mmap's MAP_FIXED_NOREPLACE flag.  Keep
+incrementing the destination address until a valid mapping is found or
+fail the current test once the max address is reached.
+
+Link: https://lkml.kernel.org/r/20220420215721.4868-2-sidhartha.kumar@oracle.com
+Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
+Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/mremap_test.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
+index efcbf537b3d5..8f4dbbd60c09 100644
+--- a/tools/testing/selftests/vm/mremap_test.c
++++ b/tools/testing/selftests/vm/mremap_test.c
+@@ -66,6 +66,30 @@ enum {
+ 	.expect_failure = should_fail				\
+ }
+ 
++/*
++ * Returns false if the requested remap region overlaps with an
++ * existing mapping (e.g text, stack) else returns true.
++ */
++static bool is_remap_region_valid(void *addr, unsigned long long size)
++{
++	void *remap_addr = NULL;
++	bool ret = true;
++
++	/* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
++	remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
++					 MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
++					 -1, 0);
++
++	if (remap_addr == MAP_FAILED) {
++		if (errno == EEXIST)
++			ret = false;
++	} else {
++		munmap(remap_addr, size);
++	}
++
++	return ret;
++}
++
+ /* Returns mmap_min_addr sysctl tunable from procfs */
+ static unsigned long long get_mmap_min_addr(void)
+ {
+-- 
+2.35.1
+
diff --git a/queue-5.15/series b/queue-5.15/series
index cb925cbf5c0..6245fd44481 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -83,3 +83,20 @@ drm-amdgpu-unify-bo-evicting-method-in-amdgpu_ttm.patch
 drm-amdgpu-explicitly-check-for-s0ix-when-evicting-resources.patch
 drm-amdgpu-don-t-set-s3-and-s0ix-at-the-same-time.patch
 drm-amdgpu-ensure-hda-function-is-suspended-before-asic-reset.patch
+gpio-mvebu-drop-pwm-base-assignment.patch
+kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch
+fbdev-make-fb_release-return-enodev-if-fbdev-was-unr.patch
+net-mlx5-fix-slab-out-of-bounds-while-reading-resour.patch
+net-mlx5e-lag-fix-use-after-free-in-fib-event-handle.patch
+net-mlx5e-lag-fix-fib_info-pointer-assignment.patch
+net-mlx5e-lag-don-t-skip-fib-events-on-current-dst.patch
+iommu-dart-add-missing-module-owner-to-ops-structure.patch
+nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch
+kvm-selftests-do-not-use-bitfields-larger-than-32-bi.patch
+kvm-selftests-silence-compiler-warning-in-the-kvm_pa.patch
+x86-kvm-preserve-bsp-msr_kvm_poll_control-across-sus.patch
+kvm-x86-do-not-change-icr-on-write-to-apic_self_ipi.patch
+kvm-x86-mmu-avoid-null-pointer-dereference-on-page-f.patch
+kvm-lapic-enable-timer-posted-interrupt-only-when-mw.patch
+selftest-vm-verify-mmap-addr-in-mremap_test.patch
+selftest-vm-verify-remap-destination-address-in-mrem.patch
diff --git a/queue-5.15/x86-kvm-preserve-bsp-msr_kvm_poll_control-across-sus.patch b/queue-5.15/x86-kvm-preserve-bsp-msr_kvm_poll_control-across-sus.patch
new file mode 100644
index 00000000000..27988e620b6
--- /dev/null
+++ b/queue-5.15/x86-kvm-preserve-bsp-msr_kvm_poll_control-across-sus.patch
@@ -0,0 +1,71 @@
+From 14f11f7d7e6120d6a539c60a1555a7db0a4cf726 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Apr 2022 00:42:32 -0700
+Subject: x86/kvm: Preserve BSP MSR_KVM_POLL_CONTROL across suspend/resume
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+[ Upstream commit 0361bdfddca20c8855ea3bdbbbc9c999912b10ff ]
+
+MSR_KVM_POLL_CONTROL is cleared on reset, thus reverting guests to
+host-side polling after suspend/resume.  Non-bootstrap CPUs are
+restored correctly by the haltpoll driver because they are hot-unplugged
+during suspend and hot-plugged during resume; however, the BSP
+is not hotpluggable and remains in host-sde polling mode after
+the guest resume.  The makes the guest pay for the cost of vmexits
+every time the guest enters idle.
+
+Fix it by recording BSP's haltpoll state and resuming it during guest
+resume.
+
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Message-Id: <1650267752-46796-1-git-send-email-wanpengli@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kvm.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
+index bd7b65081eb0..d36b58e705b6 100644
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -66,6 +66,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align
+ DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
+ static int has_steal_clock = 0;
+ 
++static int has_guest_poll = 0;
+ /*
+  * No need for any "IO delay" on KVM
+  */
+@@ -650,14 +651,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
+ 
+ static int kvm_suspend(void)
+ {
++	u64 val = 0;
++
+ 	kvm_guest_cpu_offline(false);
+ 
++#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
++	if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
++		rdmsrl(MSR_KVM_POLL_CONTROL, val);
++	has_guest_poll = !(val & 1);
++#endif
+ 	return 0;
+ }
+ 
+ static void kvm_resume(void)
+ {
+ 	kvm_cpu_online(raw_smp_processor_id());
++
++#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
++	if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
++		wrmsrl(MSR_KVM_POLL_CONTROL, 0);
++#endif
+ }
+ 
+ static struct syscore_ops kvm_syscore_ops = {
+-- 
+2.35.1
+
-- 
2.47.3