--- /dev/null
+From 336feb502a715909a8136eb6a62a83d7268a353b Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Wed, 27 Apr 2022 17:47:14 -0500
+Subject: drm/i915: Fix -Wstringop-overflow warning in call to intel_read_wm_latency()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+commit 336feb502a715909a8136eb6a62a83d7268a353b upstream.
+
+Fix the following -Wstringop-overflow warnings when building with GCC-11:
+
+drivers/gpu/drm/i915/intel_pm.c:3106:9: warning: ‘intel_read_wm_latency’ accessing 16 bytes in a region of size 10 [-Wstringop-overflow=]
+ 3106 | intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+drivers/gpu/drm/i915/intel_pm.c:3106:9: note: referencing argument 2 of type ‘u16 *’ {aka ‘short unsigned int *’}
+drivers/gpu/drm/i915/intel_pm.c:2861:13: note: in a call to function ‘intel_read_wm_latency’
+ 2861 | static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
+ | ^~~~~~~~~~~~~~~~~~~~~
+
+by removing the over-specified array size from the argument declarations.
+
+It seems that this code is actually safe because the size of the
+array depends on the hardware generation, and the function checks
+for that.
+
+Notice that wm can be an array of 5 elements:
+drivers/gpu/drm/i915/intel_pm.c:3109: intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
+
+or an array of 8 elements:
+drivers/gpu/drm/i915/intel_pm.c:3131: intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
+
+and the compiler legitimately complains about that.
+
+This helps with the ongoing efforts to globally enable
+-Wstringop-overflow.
+
+Link: https://github.com/KSPP/linux/issues/181
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/intel_pm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -2859,7 +2859,7 @@ static void ilk_compute_wm_level(const s
+ }
+
+ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
+- u16 wm[8])
++ u16 wm[])
+ {
+ struct intel_uncore *uncore = &dev_priv->uncore;
+
--- /dev/null
+From 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d Mon Sep 17 00:00:00 2001
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+Date: Tue, 17 May 2022 08:13:08 +0900
+Subject: exfat: check if cluster num is valid
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+commit 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d upstream.
+
+Syzbot reported slab-out-of-bounds read in exfat_clear_bitmap.
+This was triggered by reproducer calling truncute with size 0,
+which causes the following trace:
+
+BUG: KASAN: slab-out-of-bounds in exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174
+Read of size 8 at addr ffff888115aa9508 by task syz-executor251/365
+
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack_lvl+0x1e2/0x24b lib/dump_stack.c:118
+ print_address_description+0x81/0x3c0 mm/kasan/report.c:233
+ __kasan_report mm/kasan/report.c:419 [inline]
+ kasan_report+0x1a4/0x1f0 mm/kasan/report.c:436
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:309
+ exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174
+ exfat_free_cluster+0x25a/0x4a0 fs/exfat/fatent.c:181
+ __exfat_truncate+0x99e/0xe00 fs/exfat/file.c:217
+ exfat_truncate+0x11b/0x4f0 fs/exfat/file.c:243
+ exfat_setattr+0xa03/0xd40 fs/exfat/file.c:339
+ notify_change+0xb76/0xe10 fs/attr.c:336
+ do_truncate+0x1ea/0x2d0 fs/open.c:65
+
+Move the is_valid_cluster() helper from fatent.c to a common
+header to make it reusable in other *.c files. And add is_valid_cluster()
+to validate if cluster number is within valid range in exfat_clear_bitmap()
+and exfat_set_bitmap().
+
+Link: https://syzkaller.appspot.com/bug?id=50381fc73821ecae743b8cf24b4c9a04776f767c
+Reported-by: syzbot+a4087e40b9c13aad7892@syzkaller.appspotmail.com
+Fixes: 1e49a94cf707 ("exfat: add bitmap operations")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/balloc.c | 8 ++++++--
+ fs/exfat/exfat_fs.h | 6 ++++++
+ fs/exfat/fatent.c | 6 ------
+ 3 files changed, 12 insertions(+), 8 deletions(-)
+
+--- a/fs/exfat/balloc.c
++++ b/fs/exfat/balloc.c
+@@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+
+- WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++ if (!is_valid_cluster(sbi, clu))
++ return -EINVAL;
++
+ ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+@@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *in
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_mount_options *opts = &sbi->options;
+
+- WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++ if (!is_valid_cluster(sbi, clu))
++ return;
++
+ ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+--- a/fs/exfat/exfat_fs.h
++++ b/fs/exfat/exfat_fs.h
+@@ -381,6 +381,12 @@ static inline int exfat_sector_to_cluste
+ EXFAT_RESERVED_CLUSTERS;
+ }
+
++static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
++ unsigned int clus)
++{
++ return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
++}
++
+ /* super.c */
+ int exfat_set_volume_dirty(struct super_block *sb);
+ int exfat_clear_volume_dirty(struct super_block *sb);
+--- a/fs/exfat/fatent.c
++++ b/fs/exfat/fatent.c
+@@ -81,12 +81,6 @@ int exfat_ent_set(struct super_block *sb
+ return 0;
+ }
+
+-static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
+- unsigned int clus)
+-{
+- return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
+-}
+-
+ int exfat_ent_get(struct super_block *sb, unsigned int loc,
+ unsigned int *content)
+ {
--- /dev/null
+From d8dad2588addd1d861ce19e7df3b702330f0c7e3 Mon Sep 17 00:00:00 2001
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Date: Mon, 4 Apr 2022 11:58:06 +0900
+Subject: exfat: fix referencing wrong parent directory information after renaming
+
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+
+commit d8dad2588addd1d861ce19e7df3b702330f0c7e3 upstream.
+
+During renaming, the parent directory information maybe
+updated. But the file/directory still references to the
+old parent directory information.
+
+This bug will cause 2 problems.
+
+(1) The renamed file can not be written.
+
+ [10768.175172] exFAT-fs (sda1): error, failed to bmap (inode : 7afd50e4 iblock : 0, err : -5)
+ [10768.184285] exFAT-fs (sda1): Filesystem has been set read-only
+ ash: write error: Input/output error
+
+(2) Some dentries of the renamed file/directory are not set
+ to deleted after removing the file/directory.
+
+exfat_update_parent_info() is a workaround for the wrong parent
+directory information being used after renaming. Now that bug is
+fixed, this is no longer needed, so remove it.
+
+Fixes: 5f2aa075070c ("exfat: add inode operations")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Reviewed-by: Andy Wu <Andy.Wu@sony.com>
+Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
+Reviewed-by: Daniel Palmer <daniel.palmer@sony.com>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/namei.c | 27 +--------------------------
+ 1 file changed, 1 insertion(+), 26 deletions(-)
+
+--- a/fs/exfat/namei.c
++++ b/fs/exfat/namei.c
+@@ -1080,6 +1080,7 @@ static int exfat_rename_file(struct inod
+
+ exfat_remove_entries(inode, p_dir, oldentry, 0,
+ num_old_entries);
++ ei->dir = *p_dir;
+ ei->entry = newentry;
+ } else {
+ if (exfat_get_entry_type(epold) == TYPE_FILE) {
+@@ -1167,28 +1168,6 @@ static int exfat_move_file(struct inode
+ return 0;
+ }
+
+-static void exfat_update_parent_info(struct exfat_inode_info *ei,
+- struct inode *parent_inode)
+-{
+- struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb);
+- struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode);
+- loff_t parent_isize = i_size_read(parent_inode);
+-
+- /*
+- * the problem that struct exfat_inode_info caches wrong parent info.
+- *
+- * because of flag-mismatch of ei->dir,
+- * there is abnormal traversing cluster chain.
+- */
+- if (unlikely(parent_ei->flags != ei->dir.flags ||
+- parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) ||
+- parent_ei->start_clu != ei->dir.dir)) {
+- exfat_chain_set(&ei->dir, parent_ei->start_clu,
+- EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi),
+- parent_ei->flags);
+- }
+-}
+-
+ /* rename or move a old file into a new file */
+ static int __exfat_rename(struct inode *old_parent_inode,
+ struct exfat_inode_info *ei, struct inode *new_parent_inode,
+@@ -1219,8 +1198,6 @@ static int __exfat_rename(struct inode *
+ return -ENOENT;
+ }
+
+- exfat_update_parent_info(ei, old_parent_inode);
+-
+ exfat_chain_dup(&olddir, &ei->dir);
+ dentry = ei->entry;
+
+@@ -1241,8 +1218,6 @@ static int __exfat_rename(struct inode *
+ goto out;
+ }
+
+- exfat_update_parent_info(new_ei, new_parent_inode);
+-
+ p_dir = &(new_ei->dir);
+ new_entry = new_ei->entry;
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
--- /dev/null
+From 300981abddcb13f8f06ad58f52358b53a8096775 Mon Sep 17 00:00:00 2001
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Date: Thu, 14 Apr 2022 14:21:03 +0800
+Subject: KVM: PPC: Book3S HV: fix incorrect NULL check on list iterator
+
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+
+commit 300981abddcb13f8f06ad58f52358b53a8096775 upstream.
+
+The bug is here:
+ if (!p)
+ return ret;
+
+The list iterator value 'p' will *always* be set and non-NULL by
+list_for_each_entry(), so it is incorrect to assume that the iterator
+value will be NULL if the list is empty or no element is found.
+
+To fix the bug, Use a new value 'iter' as the list iterator, while use
+the old value 'p' as a dedicated variable to point to the found element.
+
+Fixes: dfaa973ae960 ("KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs")
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220414062103.8153-1-xiam0nd.tong@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kvm/book3s_hv_uvmem.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
++++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
+@@ -361,13 +361,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsi
+ static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+ {
+- struct kvmppc_uvmem_slot *p;
++ struct kvmppc_uvmem_slot *p = NULL, *iter;
+ bool ret = false;
+ unsigned long i;
+
+- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+- if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
++ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
++ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
++ p = iter;
+ break;
++ }
+ if (!p)
+ return ret;
+ /*
--- /dev/null
+From d22d2474e3953996f03528b84b7f52cc26a39403 Mon Sep 17 00:00:00 2001
+From: Ashish Kalra <ashish.kalra@amd.com>
+Date: Mon, 16 May 2022 15:43:10 +0000
+Subject: KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak
+
+From: Ashish Kalra <ashish.kalra@amd.com>
+
+commit d22d2474e3953996f03528b84b7f52cc26a39403 upstream.
+
+For some sev ioctl interfaces, the length parameter that is passed maybe
+less than or equal to SEV_FW_BLOB_MAX_SIZE, but larger than the data
+that PSP firmware returns. In this case, kmalloc will allocate memory
+that is the size of the input rather than the size of the data.
+Since PSP firmware doesn't fully overwrite the allocated buffer, these
+sev ioctl interface may return uninitialized kernel slab memory.
+
+Reported-by: Andy Nguyen <theflow@google.com>
+Suggested-by: David Rientjes <rientjes@google.com>
+Suggested-by: Peter Gonda <pgonda@google.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Fixes: eaf78265a4ab3 ("KVM: SVM: Move SEV code to separate file")
+Fixes: 2c07ded06427d ("KVM: SVM: add support for SEV attestation command")
+Fixes: 4cfdd47d6d95a ("KVM: SVM: Add KVM_SEV SEND_START command")
+Fixes: d3d1af85e2c75 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command")
+Fixes: eba04b20e4861 ("KVM: x86: Account a variety of miscellaneous allocations")
+Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Message-Id: <20220516154310.3685678-1-Ashish.Kalra@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -688,7 +688,7 @@ static int sev_launch_measure(struct kvm
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
+
+- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return -ENOMEM;
+
+@@ -808,7 +808,7 @@ static int __sev_dbg_decrypt_user(struct
+ if (!IS_ALIGNED(dst_paddr, 16) ||
+ !IS_ALIGNED(paddr, 16) ||
+ !IS_ALIGNED(size, 16)) {
+- tpage = (void *)alloc_page(GFP_KERNEL);
++ tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!tpage)
+ return -ENOMEM;
+
+@@ -1094,7 +1094,7 @@ static int sev_get_attestation_report(st
+ if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ return -EINVAL;
+
+- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ if (!blob)
+ return -ENOMEM;
+
+@@ -1176,7 +1176,7 @@ static int sev_send_start(struct kvm *kv
+ return -EINVAL;
+
+ /* allocate the memory to hold the session data blob */
+- session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
++ session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ if (!session_data)
+ return -ENOMEM;
+
+@@ -1300,11 +1300,11 @@ static int sev_send_update_data(struct k
+
+ /* allocate memory for header and transport buffer */
+ ret = -ENOMEM;
+- hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
++ hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ if (!hdr)
+ goto e_unpin;
+
+- trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
++ trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ if (!trans_data)
+ goto e_free_hdr;
+
--- /dev/null
+From fee060cd52d69c114b62d1a2948ea9648b5131f9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 11 Mar 2022 03:27:41 +0000
+Subject: KVM: x86: avoid calling x86 emulator without a decoded instruction
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit fee060cd52d69c114b62d1a2948ea9648b5131f9 upstream.
+
+Whenever x86_decode_emulated_instruction() detects a breakpoint, it
+returns the value that kvm_vcpu_check_breakpoint() writes into its
+pass-by-reference second argument. Unfortunately this is completely
+bogus because the expected outcome of x86_decode_emulated_instruction
+is an EMULATION_* value.
+
+Then, if kvm_vcpu_check_breakpoint() does "*r = 0" (corresponding to
+a KVM_EXIT_DEBUG userspace exit), it is misunderstood as EMULATION_OK
+and x86_emulate_instruction() is called without having decoded the
+instruction. This causes various havoc from running with a stale
+emulation context.
+
+The fix is to move the call to kvm_vcpu_check_breakpoint() where it was
+before commit 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction
+emulation with decoding") introduced x86_decode_emulated_instruction().
+The other caller of the function does not need breakpoint checks,
+because it is invoked as part of a vmexit and the processor has already
+checked those before executing the instruction that #GP'd.
+
+This fixes CVE-2022-1852.
+
+Reported-by: Qiuhao Li <qiuhao@sysec.org>
+Reported-by: Gaoning Pan <pgn@zju.edu.cn>
+Reported-by: Yongkang Jia <kangel@zju.edu.cn>
+Fixes: 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction emulation with decoding")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220311032801.3467418-2-seanjc@google.com>
+[Rewrote commit message according to Qiuhao's report, since a patch
+ already existed to fix the bug. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 31 +++++++++++++++++++------------
+ 1 file changed, 19 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8244,7 +8244,7 @@ int kvm_skip_emulated_instruction(struct
+ }
+ EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
+-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
++static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
+ {
+ if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
+ (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
+@@ -8313,25 +8313,23 @@ static bool is_vmware_backdoor_opcode(st
+ }
+
+ /*
+- * Decode to be emulated instruction. Return EMULATION_OK if success.
++ * Decode an instruction for emulation. The caller is responsible for handling
++ * code breakpoints. Note, manually detecting code breakpoints is unnecessary
++ * (and wrong) when emulating on an intercepted fault-like exception[*], as
++ * code breakpoints have higher priority and thus have already been done by
++ * hardware.
++ *
++ * [*] Except #MC, which is higher priority, but KVM should never emulate in
++ * response to a machine check.
+ */
+ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ void *insn, int insn_len)
+ {
+- int r = EMULATION_OK;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
++ int r;
+
+ init_emulate_ctxt(vcpu);
+
+- /*
+- * We will reenter on the same instruction since we do not set
+- * complete_userspace_io. This does not handle watchpoints yet,
+- * those would be handled in the emulate_ops.
+- */
+- if (!(emulation_type & EMULTYPE_SKIP) &&
+- kvm_vcpu_check_breakpoint(vcpu, &r))
+- return r;
+-
+ r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
+
+ trace_kvm_emulate_insn_start(vcpu);
+@@ -8364,6 +8362,15 @@ int x86_emulate_instruction(struct kvm_v
+ if (!(emulation_type & EMULTYPE_NO_DECODE)) {
+ kvm_clear_exception_queue(vcpu);
+
++ /*
++ * Return immediately if RIP hits a code breakpoint, such #DBs
++ * are fault-like and are higher priority than any faults on
++ * the code fetch itself.
++ */
++ if (!(emulation_type & EMULTYPE_SKIP) &&
++ kvm_vcpu_check_code_breakpoint(vcpu, &r))
++ return r;
++
+ r = x86_decode_emulated_instruction(vcpu, emulation_type,
+ insn, insn_len);
+ if (r != EMULATION_OK) {
--- /dev/null
+From 6fcee03df6a1a3101a77344be37bb85c6142d56c Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Tue, 22 Mar 2022 19:24:42 +0200
+Subject: KVM: x86: avoid loading a vCPU after .vm_destroy was called
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 6fcee03df6a1a3101a77344be37bb85c6142d56c upstream.
+
+This can cause various unexpected issues, since VM is partially
+destroyed at that point.
+
+For example when AVIC is enabled, this causes avic_vcpu_load to
+access physical id page entry which is already freed by .vm_destroy.
+
+Fixes: 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC")
+Cc: stable@vger.kernel.org
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220322172449.235575-2-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11747,20 +11747,15 @@ static void kvm_unload_vcpu_mmu(struct k
+ vcpu_put(vcpu);
+ }
+
+-static void kvm_free_vcpus(struct kvm *kvm)
++static void kvm_unload_vcpu_mmus(struct kvm *kvm)
+ {
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+
+- /*
+- * Unpin any mmu pages first.
+- */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_unload_vcpu_mmu(vcpu);
+ }
+-
+- kvm_destroy_vcpus(kvm);
+ }
+
+ void kvm_arch_sync_events(struct kvm *kvm)
+@@ -11866,11 +11861,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm
+ __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
+ mutex_unlock(&kvm->slots_lock);
+ }
++ kvm_unload_vcpu_mmus(kvm);
+ static_call_cond(kvm_x86_vm_destroy)(kvm);
+ kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
+ kvm_pic_destroy(kvm);
+ kvm_ioapic_destroy(kvm);
+- kvm_free_vcpus(kvm);
++ kvm_destroy_vcpus(kvm);
+ kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+ kvm_mmu_uninit_vm(kvm);
--- /dev/null
+From 45846661d10422ce9e22da21f8277540b29eca22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 7 Apr 2022 00:23:13 +0000
+Subject: KVM: x86: Drop WARNs that assert a triple fault never "escapes" from L2
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 45846661d10422ce9e22da21f8277540b29eca22 upstream.
+
+Remove WARNs that sanity check that KVM never lets a triple fault for L2
+escape and incorrectly end up in L1. In normal operation, the sanity
+check is perfectly valid, but it incorrectly assumes that it's impossible
+for userspace to induce KVM_REQ_TRIPLE_FAULT without bouncing through
+KVM_RUN (which guarantees kvm_check_nested_state() will see and handle
+the triple fault).
+
+The WARN can currently be triggered if userspace injects a machine check
+while L2 is active and CR4.MCE=0. And a future fix to allow save/restore
+of KVM_REQ_TRIPLE_FAULT, e.g. so that a synthesized triple fault isn't
+lost on migration, will make it trivially easy for userspace to trigger
+the WARN.
+
+Clearing KVM_REQ_TRIPLE_FAULT when forcibly leaving guest mode is
+tempting, but wrong, especially if/when the request is saved/restored,
+e.g. if userspace restores events (including a triple fault) and then
+restores nested state (which may forcibly leave guest mode). Ignoring
+the fact that KVM doesn't currently provide the necessary APIs, it's
+userspace's responsibility to manage pending events during save/restore.
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 7 PID: 1399 at arch/x86/kvm/vmx/nested.c:4522 nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel]
+ Modules linked in: kvm_intel kvm irqbypass
+ CPU: 7 PID: 1399 Comm: state_test Not tainted 5.17.0-rc3+ #808
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+ RIP: 0010:nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel]
+ Call Trace:
+ <TASK>
+ vmx_leave_nested+0x30/0x40 [kvm_intel]
+ vmx_set_nested_state+0xca/0x3e0 [kvm_intel]
+ kvm_arch_vcpu_ioctl+0xf49/0x13e0 [kvm]
+ kvm_vcpu_ioctl+0x4b9/0x660 [kvm]
+ __x64_sys_ioctl+0x83/0xb0
+ do_syscall_64+0x3b/0xc0
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+ </TASK>
+ ---[ end trace 0000000000000000 ]---
+
+Fixes: cb6a32c2b877 ("KVM: x86: Handle triple fault in L2 without killing L1")
+Cc: stable@vger.kernel.org
+Cc: Chenyi Qiang <chenyi.qiang@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220407002315.78092-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 3 ---
+ arch/x86/kvm/vmx/nested.c | 3 ---
+ 2 files changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -819,9 +819,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ struct kvm_host_map map;
+ int rc;
+
+- /* Triple faults in L2 should never escape. */
+- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4518,9 +4518,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *
+ /* trying to cancel vmlaunch/vmresume is a bug */
+ WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
+- /* Similarly, triple faults in L2 should never escape. */
+- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+ /*
+ * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
--- /dev/null
+From ffd1925a596ce68bed7d81c61cb64bc35f788a9d Mon Sep 17 00:00:00 2001
+From: Yanfei Xu <yanfei.xu@intel.com>
+Date: Mon, 23 May 2022 22:08:21 +0800
+Subject: KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest
+
+From: Yanfei Xu <yanfei.xu@intel.com>
+
+commit ffd1925a596ce68bed7d81c61cb64bc35f788a9d upstream.
+
+When kernel handles the vm-exit caused by external interrupts and NMI,
+it always sets kvm_intr_type to tell if it's dealing an IRQ or NMI. For
+the PMI scenario, it could be IRQ or NMI.
+
+However, intel_pt PMIs are only generated for HARDWARE perf events, and
+HARDWARE events are always configured to generate NMIs. Use
+kvm_handling_nmi_from_guest() to precisely identify if the intel_pt PMI
+came from the guest; this avoids false positives if an intel_pt PMI/NMI
+arrives while the host is handling an unrelated IRQ VM-Exit.
+
+Fixes: db215756ae59 ("KVM: x86: More precisely identify NMI from guest when handling PMI")
+Signed-off-by: Yanfei Xu <yanfei.xu@intel.com>
+Message-Id: <20220523140821.1345605-1-yanfei.xu@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7856,7 +7856,7 @@ static unsigned int vmx_handle_intel_pt_
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+ /* '0' on failure so that the !PT case can use a RET0 static call. */
+- if (!kvm_arch_pmi_in_guest(vcpu))
++ if (!vcpu || !kvm_handling_nmi_from_guest(vcpu))
+ return 0;
+
+ kvm_make_request(KVM_REQ_PMI, vcpu);
--- /dev/null
+From 33fbe6befa622c082f7d417896832856814bdde0 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 12 May 2022 13:14:20 +0300
+Subject: KVM: x86: fix typo in __try_cmpxchg_user causing non-atomicness
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 33fbe6befa622c082f7d417896832856814bdde0 upstream.
+
+This shows up as a TDP MMU leak when running nested. Non-working cmpxchg on L0
+relies makes L1 install two different shadow pages under same spte, and one of
+them is leaked.
+
+Fixes: 1c2361f667f36 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses")
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220512101420.306759-1-mlevitsk@redhat.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7268,7 +7268,7 @@ static int emulator_cmpxchg_emulated(str
+ goto emul_write;
+
+ hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
+- if (kvm_is_error_hva(addr))
++ if (kvm_is_error_hva(hva))
+ goto emul_write;
+
+ hva += offset_in_page(gpa);
--- /dev/null
+From 8d5678a76689acbf91245a3791fe853ab773090f Mon Sep 17 00:00:00 2001
+From: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Date: Tue, 15 Mar 2022 17:35:13 +0800
+Subject: KVM: x86/mmu: Don't rebuild page when the page is synced and no tlb flushing is required
+
+From: Hou Wenlong <houwenlong.hwl@antgroup.com>
+
+commit 8d5678a76689acbf91245a3791fe853ab773090f upstream.
+
+Before Commit c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page()
+to return true when remote flush is needed"), the return value
+of kvm_sync_page() indicates whether the page is synced, and
+kvm_mmu_get_page() would rebuild page when the sync fails.
+But now, kvm_sync_page() returns false when the page is
+synced and no tlb flushing is required, which leads to
+rebuild page in kvm_mmu_get_page(). So return the return
+value of mmu->sync_page() directly and check it in
+kvm_mmu_get_page(). If the sync fails, the page will be
+zapped and the invalid_list is not empty, so set flush as
+true is accepted in mmu_sync_children().
+
+Cc: stable@vger.kernel.org
+Fixes: c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page() to return true when remote flush is needed")
+Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Acked-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Message-Id: <0dabeeb789f57b0d793f85d073893063e692032d.1647336064.git.houwenlong.hwl@antgroup.com>
+[mmu_sync_children should not flush if the page is zapped. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1843,17 +1843,14 @@ static void kvm_mmu_commit_zap_page(stru
+ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
+ if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
+
+-static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
++static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
+ {
+ int ret = vcpu->arch.mmu->sync_page(vcpu, sp);
+
+- if (ret < 0) {
++ if (ret < 0)
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+- return false;
+- }
+-
+- return !!ret;
++ return ret;
+ }
+
+ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
+@@ -1975,7 +1972,7 @@ static int mmu_sync_children(struct kvm_
+
+ for_each_sp(pages, sp, parents, i) {
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
+- flush |= kvm_sync_page(vcpu, sp, &invalid_list);
++ flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0;
+ mmu_pages_clear_parents(&parents);
+ }
+ if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
+@@ -2016,6 +2013,7 @@ static struct kvm_mmu_page *kvm_mmu_get_
+ struct hlist_head *sp_list;
+ unsigned quadrant;
+ struct kvm_mmu_page *sp;
++ int ret;
+ int collisions = 0;
+ LIST_HEAD(invalid_list);
+
+@@ -2068,11 +2066,13 @@ static struct kvm_mmu_page *kvm_mmu_get_
+ * If the sync fails, the page is zapped. If so, break
+ * in order to rebuild it.
+ */
+- if (!kvm_sync_page(vcpu, sp, &invalid_list))
++ ret = kvm_sync_page(vcpu, sp, &invalid_list);
++ if (ret < 0)
+ break;
+
+ WARN_ON(!list_empty(&invalid_list));
+- kvm_flush_remote_tlbs(vcpu->kvm);
++ if (ret > 0)
++ kvm_flush_remote_tlbs(vcpu->kvm);
+ }
+
+ __clear_sp_write_flooding_count(sp);
--- /dev/null
+From 1c2361f667f3648855ceae25f1332c18413fdb9f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 2 Feb 2022 00:49:44 +0000
+Subject: KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 1c2361f667f3648855ceae25f1332c18413fdb9f upstream.
+
+Use the recently introduce __try_cmpxchg_user() to emulate atomic guest
+accesses via the associated userspace address instead of mapping the
+backing pfn into kernel address space. Using kvm_vcpu_map() is unsafe as
+it does not coordinate with KVM's mmu_notifier to ensure the hva=>pfn
+translation isn't changed/unmapped in the memremap() path, i.e. when
+there's no struct page and thus no elevated refcount.
+
+Fixes: 42e35f8072c3 ("KVM/X86: Use kvm_vcpu_map in emulator_cmpxchg_emulated")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 35 ++++++++++++++---------------------
+ 1 file changed, 14 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7229,15 +7229,8 @@ static int emulator_write_emulated(struc
+ exception, &write_emultor);
+ }
+
+-#define CMPXCHG_TYPE(t, ptr, old, new) \
+- (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
+-
+-#ifdef CONFIG_X86_64
+-# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
+-#else
+-# define CMPXCHG64(ptr, old, new) \
+- (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
+-#endif
++#define emulator_try_cmpxchg_user(t, ptr, old, new) \
++ (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t))
+
+ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
+@@ -7246,12 +7239,11 @@ static int emulator_cmpxchg_emulated(str
+ unsigned int bytes,
+ struct x86_exception *exception)
+ {
+- struct kvm_host_map map;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ u64 page_line_mask;
++ unsigned long hva;
+ gpa_t gpa;
+- char *kaddr;
+- bool exchanged;
++ int r;
+
+ /* guests cmpxchg8b have to be emulated atomically */
+ if (bytes > 8 || (bytes & (bytes - 1)))
+@@ -7275,31 +7267,32 @@ static int emulator_cmpxchg_emulated(str
+ if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
+ goto emul_write;
+
+- if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
++ hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
++ if (kvm_is_error_hva(addr))
+ goto emul_write;
+
+- kaddr = map.hva + offset_in_page(gpa);
++ hva += offset_in_page(gpa);
+
+ switch (bytes) {
+ case 1:
+- exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u8, hva, old, new);
+ break;
+ case 2:
+- exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u16, hva, old, new);
+ break;
+ case 4:
+- exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u32, hva, old, new);
+ break;
+ case 8:
+- exchanged = CMPXCHG64(kaddr, old, new);
++ r = emulator_try_cmpxchg_user(u64, hva, old, new);
+ break;
+ default:
+ BUG();
+ }
+
+- kvm_vcpu_unmap(vcpu, &map, true);
+-
+- if (!exchanged)
++ if (r < 0)
++ goto emul_write;
++ if (r)
+ return X86EMUL_CMPXCHG_FAILED;
+
+ kvm_page_track_write(vcpu, gpa, new, bytes);
--- /dev/null
+From f122dfe4476890d60b8c679128cd2259ec96a24c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 2 Feb 2022 00:49:43 +0000
+Subject: KVM: x86: Use __try_cmpxchg_user() to update guest PTE A/D bits
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f122dfe4476890d60b8c679128cd2259ec96a24c upstream.
+
+Use the recently introduced __try_cmpxchg_user() to update guest PTE A/D
+bits instead of mapping the PTE into kernel address space. The VM_PFNMAP
+path is broken as it assumes that vm_pgoff is the base pfn of the mapped
+VMA range, which is conceptually wrong as vm_pgoff is the offset relative
+to the file and has nothing to do with the pfn. The horrific hack worked
+for the original use case (backing guest memory with /dev/mem), but leads
+to accessing "random" pfns for pretty much any other VM_PFNMAP case.
+
+Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
+Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Tested-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/paging_tmpl.h | 38 +-------------------------------------
+ 1 file changed, 1 insertion(+), 37 deletions(-)
+
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -144,42 +144,6 @@ static bool FNAME(is_rsvd_bits_set)(stru
+ FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
+ }
+
+-static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+- pt_element_t __user *ptep_user, unsigned index,
+- pt_element_t orig_pte, pt_element_t new_pte)
+-{
+- signed char r;
+-
+- if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
+- return -EFAULT;
+-
+-#ifdef CMPXCHG
+- asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
+- "setnz %b[r]\n"
+- "2:"
+- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
+- : [ptr] "+m" (*ptep_user),
+- [old] "+a" (orig_pte),
+- [r] "=q" (r)
+- : [new] "r" (new_pte)
+- : "memory");
+-#else
+- asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
+- "setnz %b[r]\n"
+- "2:"
+- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
+- : [ptr] "+m" (*ptep_user),
+- [old] "+A" (orig_pte),
+- [r] "=q" (r)
+- : [new_lo] "b" ((u32)new_pte),
+- [new_hi] "c" ((u32)(new_pte >> 32))
+- : "memory");
+-#endif
+-
+- user_access_end();
+- return r;
+-}
+-
+ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, u64 *spte,
+ u64 gpte)
+@@ -278,7 +242,7 @@ static int FNAME(update_accessed_dirty_b
+ if (unlikely(!walker->pte_writable[level - 1]))
+ continue;
+
+- ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
++ ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault);
+ if (ret)
+ return ret;
+
--- /dev/null
+From c5794097b269f15961ed78f7f27b50e51766dec9 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 21 Apr 2022 13:53:33 -0500
+Subject: net: ipa: compute proper aggregation limit
+
+From: Alex Elder <elder@linaro.org>
+
+commit c5794097b269f15961ed78f7f27b50e51766dec9 upstream.
+
+The aggregation byte limit for an endpoint is currently computed
+based on the endpoint's receive buffer size.
+
+However, some bytes at the front of each receive buffer are reserved
+on the assumption that--as with SKBs--it might be useful to insert
+data (such as headers) before what lands in the buffer.
+
+The aggregation byte limit currently doesn't take into account that
+reserved space, and as a result, aggregation could require space
+past that which is available in the buffer.
+
+Fix this by reducing the size used to compute the aggregation byte
+limit by the NET_SKB_PAD offset reserved for each receive buffer.
+
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -130,9 +130,10 @@ static bool ipa_endpoint_data_valid_one(
+ */
+ if (data->endpoint.config.aggregation) {
+ limit += SZ_1K * aggr_byte_limit_max(ipa->version);
+- if (buffer_size > limit) {
++ if (buffer_size - NET_SKB_PAD > limit) {
+ dev_err(dev, "RX buffer size too large for aggregated RX endpoint %u (%u > %u)\n",
+- data->endpoint_id, buffer_size, limit);
++ data->endpoint_id,
++ buffer_size - NET_SKB_PAD, limit);
+
+ return false;
+ }
+@@ -739,6 +740,7 @@ static void ipa_endpoint_init_aggr(struc
+ if (endpoint->data->aggregation) {
+ if (!endpoint->toward_ipa) {
+ const struct ipa_endpoint_rx_data *rx_data;
++ u32 buffer_size;
+ bool close_eof;
+ u32 limit;
+
+@@ -746,7 +748,8 @@ static void ipa_endpoint_init_aggr(struc
+ val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK);
+ val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK);
+
+- limit = ipa_aggr_size_kb(rx_data->buffer_size);
++ buffer_size = rx_data->buffer_size;
++ limit = ipa_aggr_size_kb(buffer_size - NET_SKB_PAD);
+ val |= aggr_byte_limit_encoded(version, limit);
+
+ limit = IPA_AGGR_TIME_LIMIT;
--- /dev/null
+From 56b14ecec97f39118bf85c9ac2438c5a949509ed Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 20 May 2022 00:02:04 +0200
+Subject: netfilter: conntrack: re-fetch conntrack after insertion
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 56b14ecec97f39118bf85c9ac2438c5a949509ed upstream.
+
+In case the conntrack is clashing, insertion can free skb->_nfct and
+set skb->_nfct to the already-confirmed entry.
+
+This wasn't found before because the conntrack entry and the extension
+space used to free'd after an rcu grace period, plus the race needs
+events enabled to trigger.
+
+Reported-by: <syzbot+793a590957d9c1b96620@syzkaller.appspotmail.com>
+Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+Fixes: 2ad9d7747c10 ("netfilter: conntrack: free extension area immediately")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_conntrack_core.h | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(s
+ int ret = NF_ACCEPT;
+
+ if (ct) {
+- if (!nf_ct_is_confirmed(ct))
++ if (!nf_ct_is_confirmed(ct)) {
+ ret = __nf_conntrack_confirm(skb);
++
++ if (ret == NF_ACCEPT)
++ ct = (struct nf_conn *)skb_nfct(skb);
++ }
++
+ if (likely(ret == NF_ACCEPT))
+ nf_ct_deliver_cached_events(ct);
+ }
--- /dev/null
+From f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 30 May 2022 18:24:06 +0200
+Subject: netfilter: nf_tables: double hook unregistration in netns path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream.
+
+__nft_release_hooks() is called from pre_netns exit path which
+unregisters the hooks, then the NETDEV_UNREGISTER event is triggered
+which unregisters the hooks again.
+
+[ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270
+[...]
+[ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27
+[ 565.253682] Workqueue: netns cleanup_net
+[ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270
+[...]
+[ 565.297120] Call Trace:
+[ 565.300900] <TASK>
+[ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables]
+[ 565.308518] raw_notifier_call_chain+0x63/0x80
+[ 565.312386] unregister_netdevice_many+0x54f/0xb50
+
+Unregister and destroy netdev hook from netns pre_exit via kfree_rcu
+so the NETDEV_UNREGISTER path see unregistered hooks.
+
+Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 54 +++++++++++++++++++++++++++++++-----------
+ 1 file changed, 41 insertions(+), 13 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -222,12 +222,18 @@ err_register:
+ }
+
+ static void nft_netdev_unregister_hooks(struct net *net,
+- struct list_head *hook_list)
++ struct list_head *hook_list,
++ bool release_netdev)
+ {
+- struct nft_hook *hook;
++ struct nft_hook *hook, *next;
+
+- list_for_each_entry(hook, hook_list, list)
++ list_for_each_entry_safe(hook, next, hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
++ if (release_netdev) {
++ list_del(&hook->list);
++ kfree_rcu(hook, rcu);
++ }
++ }
+ }
+
+ static int nf_tables_register_hook(struct net *net,
+@@ -253,9 +259,10 @@ static int nf_tables_register_hook(struc
+ return nf_register_net_hook(net, &basechain->ops);
+ }
+
+-static void nf_tables_unregister_hook(struct net *net,
+- const struct nft_table *table,
+- struct nft_chain *chain)
++static void __nf_tables_unregister_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain,
++ bool release_netdev)
+ {
+ struct nft_base_chain *basechain;
+ const struct nf_hook_ops *ops;
+@@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(st
+ return basechain->type->ops_unregister(net, ops);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+- nft_netdev_unregister_hooks(net, &basechain->hook_list);
++ nft_netdev_unregister_hooks(net, &basechain->hook_list,
++ release_netdev);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+ }
+
++static void nf_tables_unregister_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain)
++{
++ return __nf_tables_unregister_hook(net, table, chain, false);
++}
++
+ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+ {
+ struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -7301,13 +7316,25 @@ static void nft_unregister_flowtable_hoo
+ FLOW_BLOCK_UNBIND);
+ }
+
+-static void nft_unregister_flowtable_net_hooks(struct net *net,
+- struct list_head *hook_list)
++static void __nft_unregister_flowtable_net_hooks(struct net *net,
++ struct list_head *hook_list,
++ bool release_netdev)
+ {
+- struct nft_hook *hook;
++ struct nft_hook *hook, *next;
+
+- list_for_each_entry(hook, hook_list, list)
++ list_for_each_entry_safe(hook, next, hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
++ if (release_netdev) {
++ list_del(&hook->list);
++ kfree_rcu(hook);
++ }
++ }
++}
++
++static void nft_unregister_flowtable_net_hooks(struct net *net,
++ struct list_head *hook_list)
++{
++ __nft_unregister_flowtable_net_hooks(net, hook_list, false);
+ }
+
+ static int nft_register_flowtable_net_hooks(struct net *net,
+@@ -9751,9 +9778,10 @@ static void __nft_release_hook(struct ne
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+- nf_tables_unregister_hook(net, table, chain);
++ __nf_tables_unregister_hook(net, table, chain, true);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+- nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
++ __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
++ true);
+ }
+
+ static void __nft_release_hooks(struct net *net)
--- /dev/null
+From 3923b1e4406680d57da7e873da77b1683035d83f Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 30 May 2022 18:24:05 +0200
+Subject: netfilter: nf_tables: hold mutex on netns pre_exit path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 3923b1e4406680d57da7e873da77b1683035d83f upstream.
+
+clean_net() runs in workqueue while walking over the lists, grab mutex.
+
+Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9892,7 +9892,11 @@ static int __net_init nf_tables_init_net
+
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
++ struct nftables_pernet *nft_net = nft_pernet(net);
++
++ mutex_lock(&nft_net->commit_mutex);
+ __nft_release_hooks(net);
++ mutex_unlock(&nft_net->commit_mutex);
+ }
+
+ static void __net_exit nf_tables_exit_net(struct net *net)
--- /dev/null
+From fecf31ee395b0295f2d7260aa29946b7605f7c85 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 27 May 2022 09:56:18 +0200
+Subject: netfilter: nf_tables: sanitize nft_set_desc_concat_parse()
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit fecf31ee395b0295f2d7260aa29946b7605f7c85 upstream.
+
+Add several sanity checks for nft_set_desc_concat_parse():
+
+- validate desc->field_count not larger than desc->field_len array.
+- field length cannot be larger than desc->field_len (ie. U8_MAX)
+- total length of the concatenation cannot be larger than register array.
+
+Joint work with Florian Westphal.
+
+Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields")
+Reported-by: <zhangziming.zzm@antgroup.com>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4246,6 +4246,9 @@ static int nft_set_desc_concat_parse(con
+ u32 len;
+ int err;
+
++ if (desc->field_count >= ARRAY_SIZE(desc->field_len))
++ return -E2BIG;
++
+ err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
+ nft_concat_policy, NULL);
+ if (err < 0)
+@@ -4255,9 +4258,8 @@ static int nft_set_desc_concat_parse(con
+ return -EINVAL;
+
+ len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
+-
+- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
+- return -E2BIG;
++ if (!len || len > U8_MAX)
++ return -EINVAL;
+
+ desc->field_len[desc->field_count++] = len;
+
+@@ -4268,7 +4270,8 @@ static int nft_set_desc_concat(struct nf
+ const struct nlattr *nla)
+ {
+ struct nlattr *attr;
+- int rem, err;
++ u32 num_regs = 0;
++ int rem, err, i;
+
+ nla_for_each_nested(attr, nla, rem) {
+ if (nla_type(attr) != NFTA_LIST_ELEM)
+@@ -4279,6 +4282,12 @@ static int nft_set_desc_concat(struct nf
+ return err;
+ }
+
++ for (i = 0; i < desc->field_count; i++)
++ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
++
++ if (num_regs > NFT_REG32_COUNT)
++ return -E2BIG;
++
+ return 0;
+ }
+
--- /dev/null
+From 558254b0b602b8605d7246a10cfeb584b1fcabfc Mon Sep 17 00:00:00 2001
+From: Phil Sutter <phil@nwl.cc>
+Date: Tue, 24 May 2022 14:50:01 +0200
+Subject: netfilter: nft_limit: Clone packet limits' cost value
+
+From: Phil Sutter <phil@nwl.cc>
+
+commit 558254b0b602b8605d7246a10cfeb584b1fcabfc upstream.
+
+When cloning a packet-based limit expression, copy the cost value as
+well. Otherwise the new limit is not functional anymore.
+
+Fixes: 3b9e2ea6c11bf ("netfilter: nft_limit: move stateful fields out of expression data")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_limit.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/netfilter/nft_limit.c
++++ b/net/netfilter/nft_limit.c
+@@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct n
+ struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
+ struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+
++ priv_dst->cost = priv_src->cost;
++
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ }
+
assoc_array-fix-bug_on-during-garbage-collect.patch
pipe-make-poll_usage-boolean-and-annotate-its-access.patch
pipe-fix-missing-lock-in-pipe_resize_ring.patch
+net-ipa-compute-proper-aggregation-limit.patch
+drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
+exfat-check-if-cluster-num-is-valid.patch
+exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
+netfilter-nft_limit-clone-packet-limits-cost-value.patch
+netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
+netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
+netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
+netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
+kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
+x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
+x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
+x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
+x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch
+kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
+kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
+kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
+kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
+kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
+kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
+kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
+kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
+kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
--- /dev/null
+From d187ba5312307d51818beafaad87d28a7d939adf Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 4 May 2022 00:12:19 +0000
+Subject: x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave)
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d187ba5312307d51818beafaad87d28a7d939adf upstream.
+
+Set the starting uABI size of KVM's guest FPU to 'struct kvm_xsave',
+i.e. to KVM's historical uABI size. When saving FPU state for usersapce,
+KVM (well, now the FPU) sets the FP+SSE bits in the XSAVE header even if
+the host doesn't support XSAVE. Setting the XSAVE header allows the VM
+to be migrated to a host that does support XSAVE without the new host
+having to handle FPU state that may or may not be compatible with XSAVE.
+
+Setting the uABI size to the host's default size results in out-of-bounds
+writes (setting the FP+SSE bits) and data corruption (that is thankfully
+caught by KASAN) when running on hosts without XSAVE, e.g. on Core2 CPUs.
+
+WARN if the default size is larger than KVM's historical uABI size; all
+features that can push the FPU size beyond the historical size must be
+opt-in.
+
+ ==================================================================
+ BUG: KASAN: slab-out-of-bounds in fpu_copy_uabi_to_guest_fpstate+0x86/0x130
+ Read of size 8 at addr ffff888011e33a00 by task qemu-build/681
+ CPU: 1 PID: 681 Comm: qemu-build Not tainted 5.18.0-rc5-KASAN-amd64 #1
+ Hardware name: /DG35EC, BIOS ECG3510M.86A.0118.2010.0113.1426 01/13/2010
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x45
+ print_report.cold+0x45/0x575
+ kasan_report+0x9b/0xd0
+ fpu_copy_uabi_to_guest_fpstate+0x86/0x130
+ kvm_arch_vcpu_ioctl+0x72a/0x1c50 [kvm]
+ kvm_vcpu_ioctl+0x47f/0x7b0 [kvm]
+ __x64_sys_ioctl+0x5de/0xc90
+ do_syscall_64+0x31/0x50
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+ </TASK>
+ Allocated by task 0:
+ (stack is not available)
+ The buggy address belongs to the object at ffff888011e33800
+ which belongs to the cache kmalloc-512 of size 512
+ The buggy address is located 0 bytes to the right of
+ 512-byte region [ffff888011e33800, ffff888011e33a00)
+ The buggy address belongs to the physical page:
+ page:0000000089cd4adb refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11e30
+ head:0000000089cd4adb order:2 compound_mapcount:0 compound_pincount:0
+ flags: 0x4000000000010200(slab|head|zone=1)
+ raw: 4000000000010200 dead000000000100 dead000000000122 ffff888001041c80
+ raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
+ page dumped because: kasan: bad access detected
+ Memory state around the buggy address:
+ ffff888011e33900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffff888011e33980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ >ffff888011e33a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ^
+ ffff888011e33a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff888011e33b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ==================================================================
+ Disabling lock debugging due to kernel taint
+
+Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer")
+Fixes: c60427dd50ba ("x86/fpu: Add uabi_size to guest_fpu")
+Reported-by: Zdenek Kaspar <zkaspar82@gmail.com>
+Cc: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Tested-by: Zdenek Kaspar <zkaspar82@gmail.com>
+Message-Id: <20220504001219.983513-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/core.c | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -14,6 +14,8 @@
+ #include <asm/traps.h>
+ #include <asm/irq_regs.h>
+
++#include <uapi/asm/kvm.h>
++
+ #include <linux/hardirq.h>
+ #include <linux/pkeys.h>
+ #include <linux/vmalloc.h>
+@@ -232,7 +234,20 @@ bool fpu_alloc_guest_fpstate(struct fpu_
+ gfpu->fpstate = fpstate;
+ gfpu->xfeatures = fpu_user_cfg.default_features;
+ gfpu->perm = fpu_user_cfg.default_features;
+- gfpu->uabi_size = fpu_user_cfg.default_size;
++
++ /*
++ * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
++ * to userspace, even when XSAVE is unsupported, so that restoring FPU
++ * state on a different CPU that does support XSAVE can cleanly load
++ * the incoming state using its natural XSAVE. In other words, KVM's
++ * uABI size may be larger than this host's default size. Conversely,
++ * the default size should never be larger than KVM's base uABI size;
++ * all features that can expand the uABI size must be opt-in.
++ */
++ gfpu->uabi_size = sizeof(struct kvm_xsave);
++ if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
++ gfpu->uabi_size = fpu_user_cfg.default_size;
++
+ fpu_init_guest_permissions(gfpu);
+
+ return true;
--- /dev/null
+From 0547758a6de3cc71a0cfdd031a3621a30db6a68b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 19 May 2022 07:57:11 -0700
+Subject: x86/kvm: Alloc dummy async #PF token outside of raw spinlock
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0547758a6de3cc71a0cfdd031a3621a30db6a68b upstream.
+
+Drop the raw spinlock in kvm_async_pf_task_wake() before allocating the
+the dummy async #PF token, the allocator is preemptible on PREEMPT_RT
+kernels and must not be called from truly atomic contexts.
+
+Opportunistically document why it's ok to loop on allocation failure,
+i.e. why the function won't get stuck in an infinite loop.
+
+Reported-by: Yajun Deng <yajun.deng@linux.dev>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c | 41 +++++++++++++++++++++++++++--------------
+ 1 file changed, 27 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -191,7 +191,7 @@ void kvm_async_pf_task_wake(u32 token)
+ {
+ u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+ struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+- struct kvm_task_sleep_node *n;
++ struct kvm_task_sleep_node *n, *dummy = NULL;
+
+ if (token == ~0) {
+ apf_task_wake_all();
+@@ -203,28 +203,41 @@ again:
+ n = _find_apf_task(b, token);
+ if (!n) {
+ /*
+- * async PF was not yet handled.
+- * Add dummy entry for the token.
++ * Async #PF not yet handled, add a dummy entry for the token.
++ * Allocating the token must be down outside of the raw lock
++ * as the allocator is preemptible on PREEMPT_RT kernels.
+ */
+- n = kzalloc(sizeof(*n), GFP_ATOMIC);
+- if (!n) {
++ if (!dummy) {
++ raw_spin_unlock(&b->lock);
++ dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
++
+ /*
+- * Allocation failed! Busy wait while other cpu
+- * handles async PF.
++ * Continue looping on allocation failure, eventually
++ * the async #PF will be handled and allocating a new
++ * node will be unnecessary.
++ */
++ if (!dummy)
++ cpu_relax();
++
++ /*
++ * Recheck for async #PF completion before enqueueing
++ * the dummy token to avoid duplicate list entries.
+ */
+- raw_spin_unlock(&b->lock);
+- cpu_relax();
+ goto again;
+ }
+- n->token = token;
+- n->cpu = smp_processor_id();
+- init_swait_queue_head(&n->wq);
+- hlist_add_head(&n->link, &b->list);
++ dummy->token = token;
++ dummy->cpu = smp_processor_id();
++ init_swait_queue_head(&dummy->wq);
++ hlist_add_head(&dummy->link, &b->list);
++ dummy = NULL;
+ } else {
+ apf_task_wake_one(n);
+ }
+ raw_spin_unlock(&b->lock);
+- return;
++
++ /* A dummy token might be allocated and ultimately not used. */
++ if (dummy)
++ kfree(dummy);
+ }
+ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
+
--- /dev/null
+From baec4f5a018fe2d708fc1022330dba04b38b5fe3 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 24 May 2022 09:43:31 -0400
+Subject: x86, kvm: use correct GFP flags for preemption disabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit baec4f5a018fe2d708fc1022330dba04b38b5fe3 upstream.
+
+Commit ddd7ed842627 ("x86/kvm: Alloc dummy async #PF token outside of
+raw spinlock") leads to the following Smatch static checker warning:
+
+ arch/x86/kernel/kvm.c:212 kvm_async_pf_task_wake()
+ warn: sleeping in atomic context
+
+arch/x86/kernel/kvm.c
+ 202 raw_spin_lock(&b->lock);
+ 203 n = _find_apf_task(b, token);
+ 204 if (!n) {
+ 205 /*
+ 206 * Async #PF not yet handled, add a dummy entry for the token.
+ 207 * Allocating the token must be down outside of the raw lock
+ 208 * as the allocator is preemptible on PREEMPT_RT kernels.
+ 209 */
+ 210 if (!dummy) {
+ 211 raw_spin_unlock(&b->lock);
+--> 212 dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
+ ^^^^^^^^^^
+Smatch thinks the caller has preempt disabled. The `smdb.py preempt
+kvm_async_pf_task_wake` output call tree is:
+
+sysvec_kvm_asyncpf_interrupt() <- disables preempt
+-> __sysvec_kvm_asyncpf_interrupt()
+ -> kvm_async_pf_task_wake()
+
+The caller is this:
+
+arch/x86/kernel/kvm.c
+ 290 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
+ 291 {
+ 292 struct pt_regs *old_regs = set_irq_regs(regs);
+ 293 u32 token;
+ 294
+ 295 ack_APIC_irq();
+ 296
+ 297 inc_irq_stat(irq_hv_callback_count);
+ 298
+ 299 if (__this_cpu_read(apf_reason.enabled)) {
+ 300 token = __this_cpu_read(apf_reason.token);
+ 301 kvm_async_pf_task_wake(token);
+ 302 __this_cpu_write(apf_reason.token, 0);
+ 303 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+ 304 }
+ 305
+ 306 set_irq_regs(old_regs);
+ 307 }
+
+The DEFINE_IDTENTRY_SYSVEC() is a wrapper that calls this function
+from the call_on_irqstack_cond(). It's inside the call_on_irqstack_cond()
+where preempt is disabled (unless it's already disabled). The
+irq_enter/exit_rcu() functions disable/enable preempt.
+
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -209,7 +209,7 @@ again:
+ */
+ if (!dummy) {
+ raw_spin_unlock(&b->lock);
+- dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
++ dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
+
+ /*
+ * Continue looping on allocation failure, eventually
--- /dev/null
+From 989b5db215a2f22f89d730b607b071d964780f10 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 2 Feb 2022 00:49:42 +0000
+Subject: x86/uaccess: Implement macros for CMPXCHG on user addresses
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 989b5db215a2f22f89d730b607b071d964780f10 upstream.
+
+Add support for CMPXCHG loops on userspace addresses. Provide both an
+"unsafe" version for tight loops that do their own uaccess begin/end, as
+well as a "safe" version for use cases where the CMPXCHG is not buried in
+a loop, e.g. KVM will resume the guest instead of looping when emulation
+of a guest atomic accesses fails the CMPXCHG.
+
+Provide 8-byte versions for 32-bit kernels so that KVM can do CMPXCHG on
+guest PAE PTEs, which are accessed via userspace addresses.
+
+Guard the asm_volatile_goto() variation with CC_HAS_ASM_GOTO_TIED_OUTPUT,
+the "+m" constraint fails on some compilers that otherwise support
+CC_HAS_ASM_GOTO_OUTPUT.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h | 142 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 142 insertions(+)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -382,6 +382,103 @@ do { \
+
+ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
++#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm_volatile_goto("\n" \
++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++ _ASM_EXTABLE_UA(1b, %l[label]) \
++ : CC_OUT(z) (success), \
++ [ptr] "+m" (*_ptr), \
++ [old] "+a" (__old) \
++ : [new] ltype (__new) \
++ : "memory" \
++ : label); \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++
++#ifdef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm_volatile_goto("\n" \
++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
++ _ASM_EXTABLE_UA(1b, %l[label]) \
++ : CC_OUT(z) (success), \
++ "+A" (__old), \
++ [ptr] "+m" (*_ptr) \
++ : "b" ((u32)__new), \
++ "c" ((u32)((u64)__new >> 32)) \
++ : "memory" \
++ : label); \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++#endif // CONFIG_X86_32
++#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
++ int __err = 0; \
++ bool success; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm volatile("\n" \
++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++ CC_SET(z) \
++ "2:\n" \
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \
++ %[errout]) \
++ : CC_OUT(z) (success), \
++ [errout] "+r" (__err), \
++ [ptr] "+m" (*_ptr), \
++ [old] "+a" (__old) \
++ : [new] ltype (__new) \
++ : "memory", "cc"); \
++ if (unlikely(__err)) \
++ goto label; \
++ if (unlikely(!success)) \
++ *_old = __old; \
++ likely(success); })
++
++#ifdef CONFIG_X86_32
++/*
++ * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error.
++ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
++ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses
++ * both ESI and EDI for the memory operand, compilation will fail if the error
++ * is an input+output as there will be no register available for input.
++ */
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
++ int __result; \
++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
++ __typeof__(*(_ptr)) __old = *_old; \
++ __typeof__(*(_ptr)) __new = (_new); \
++ asm volatile("\n" \
++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
++ "mov $0, %%ecx\n\t" \
++ "setz %%cl\n" \
++ "2:\n" \
++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \
++ : [result]"=c" (__result), \
++ "+A" (__old), \
++ [ptr] "+m" (*_ptr) \
++ : "b" ((u32)__new), \
++ "c" ((u32)((u64)__new >> 32)) \
++ : "memory", "cc"); \
++ if (unlikely(__result < 0)) \
++ goto label; \
++ if (unlikely(!__result)) \
++ *_old = __old; \
++ likely(__result); })
++#endif // CONFIG_X86_32
++#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++
+ /* FIXME: this hack is definitely wrong -AK */
+ struct __large_struct { unsigned long buf[100]; };
+ #define __m(x) (*(struct __large_struct __user *)(x))
+@@ -474,6 +571,51 @@ do { \
+ } while (0)
+ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
++extern void __try_cmpxchg_user_wrong_size(void);
++
++#ifndef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \
++ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
++#endif
++
++/*
++ * Force the pointer to u<size> to match the size expected by the asm helper.
++ * clang/LLVM compiles all cases and only discards the unused paths after
++ * processing errors, which breaks i386 if the pointer is an 8-byte value.
++ */
++#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
++ bool __ret; \
++ __chk_user_ptr(_ptr); \
++ switch (sizeof(*(_ptr))) { \
++ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \
++ (__force u8 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \
++ (__force u16 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \
++ (__force u32 *)(_ptr), (_oldp), \
++ (_nval), _label); \
++ break; \
++ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
++ (_nval), _label); \
++ break; \
++ default: __try_cmpxchg_user_wrong_size(); \
++ } \
++ __ret; })
++
++/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
++#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \
++ int __ret = -EFAULT; \
++ __uaccess_begin_nospec(); \
++ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \
++_label: \
++ __uaccess_end(); \
++ __ret; \
++ })
++
+ /*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.