From 3efe9d7ddf8f4c19e85bc008de8fa1eacd1cc9f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Jun 2022 17:50:52 +0200 Subject: [PATCH] 5.17-stable patches added patches: drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch exfat-check-if-cluster-num-is-valid.patch exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch net-ipa-compute-proper-aggregation-limit.patch netfilter-conntrack-re-fetch-conntrack-after-insertion.patch netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch netfilter-nft_limit-clone-packet-limits-cost-value.patch x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch --- ...ing-in-call-to-intel_read_wm_latency.patch | 57 ++++++ .../exfat-check-if-cluster-num-is-valid.patch | 104 ++++++++++ ...directory-information-after-renaming.patch | 98 +++++++++ ...ncorrect-null-check-on-list-iterator.patch | 51 +++++ ...terfaces-to-prevent-kernel-data-leak.patch | 88 ++++++++ ...ulator-without-a-decoded-instruction.patch | 107 ++++++++++ ...-a-vcpu-after-.vm_destroy-was-called.patch | 63 ++++++ ...a-triple-fault-never-escapes-from-l2.patch | 83 ++++++++ ...ndling-wrongly-considered-from-guest.patch | 40 ++++ ..._cmpxchg_user-causing-non-atomicness.patch | 35 ++++ ...nced-and-no-tlb-flushing-is-required.patch | 89 ++++++++ ...xchg_user-to-emulate-atomic-accesses.patch | 103 ++++++++++ ...hg_user-to-update-guest-pte-a-d-bits.patch | 84 ++++++++ ...ipa-compute-proper-aggregation-limit.patch | 49 +++++ ...k-re-fetch-conntrack-after-insertion.patch | 43 ++++ ...le-hook-unregistration-in-netns-path.patch | 137 +++++++++++++ ...es-hold-mutex-on-netns-pre_exit-path.patch | 32 +++ ...s-sanitize-nft_set_desc_concat_parse.patch | 74 +++++++ ...limit-clone-packet-limits-cost-value.patch | 31 +++ queue-5.17/series | 23 +++ ...uabi-size-to-sizeof-struct-kvm_xsave.patch | 113 +++++++++++ ...ync-pf-token-outside-of-raw-spinlock.patch | 91 +++++++++ ...ct-gfp-flags-for-preemption-disabled.patch | 81 ++++++++ ...macros-for-cmpxchg-on-user-addresses.patch | 191 ++++++++++++++++++ 24 files changed, 1867 insertions(+) create mode 100644 queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch create mode 100644 queue-5.17/exfat-check-if-cluster-num-is-valid.patch create mode 100644 queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch create mode 100644 queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch create mode 100644 queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch create mode 100644 queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch create mode 100644 queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch create mode 100644 queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch create mode 100644 queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch create mode 100644 queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch create mode 100644 queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch create mode 100644 queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch create mode 100644 queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch create mode 100644 queue-5.17/net-ipa-compute-proper-aggregation-limit.patch create mode 100644 queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch create mode 100644 queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch create mode 100644 queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch create mode 100644 queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch create mode 100644 queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch create mode 100644 queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch create mode 100644 queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch create mode 100644 queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch create mode 100644 queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch diff --git a/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch b/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch new file mode 100644 index 00000000000..80b0d00a8d7 --- /dev/null +++ b/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch @@ -0,0 +1,57 @@ +From 336feb502a715909a8136eb6a62a83d7268a353b Mon Sep 17 00:00:00 2001 +From: "Gustavo A. R. Silva" +Date: Wed, 27 Apr 2022 17:47:14 -0500 +Subject: drm/i915: Fix -Wstringop-overflow warning in call to intel_read_wm_latency() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Gustavo A. R. Silva + +commit 336feb502a715909a8136eb6a62a83d7268a353b upstream. + +Fix the following -Wstringop-overflow warnings when building with GCC-11: + +drivers/gpu/drm/i915/intel_pm.c:3106:9: warning: ‘intel_read_wm_latency’ accessing 16 bytes in a region of size 10 [-Wstringop-overflow=] + 3106 | intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +drivers/gpu/drm/i915/intel_pm.c:3106:9: note: referencing argument 2 of type ‘u16 *’ {aka ‘short unsigned int *’} +drivers/gpu/drm/i915/intel_pm.c:2861:13: note: in a call to function ‘intel_read_wm_latency’ + 2861 | static void intel_read_wm_latency(struct drm_i915_private *dev_priv, + | ^~~~~~~~~~~~~~~~~~~~~ + +by removing the over-specified array size from the argument declarations. + +It seems that this code is actually safe because the size of the +array depends on the hardware generation, and the function checks +for that. + +Notice that wm can be an array of 5 elements: +drivers/gpu/drm/i915/intel_pm.c:3109: intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); + +or an array of 8 elements: +drivers/gpu/drm/i915/intel_pm.c:3131: intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency); + +and the compiler legitimately complains about that. + +This helps with the ongoing efforts to globally enable +-Wstringop-overflow. + +Link: https://github.com/KSPP/linux/issues/181 +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/intel_pm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -2876,7 +2876,7 @@ static void ilk_compute_wm_level(const s + } + + static void intel_read_wm_latency(struct drm_i915_private *dev_priv, +- u16 wm[8]) ++ u16 wm[]) + { + struct intel_uncore *uncore = &dev_priv->uncore; + diff --git a/queue-5.17/exfat-check-if-cluster-num-is-valid.patch b/queue-5.17/exfat-check-if-cluster-num-is-valid.patch new file mode 100644 index 00000000000..0ddcb541677 --- /dev/null +++ b/queue-5.17/exfat-check-if-cluster-num-is-valid.patch @@ -0,0 +1,104 @@ +From 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d Mon Sep 17 00:00:00 2001 +From: Tadeusz Struk +Date: Tue, 17 May 2022 08:13:08 +0900 +Subject: exfat: check if cluster num is valid + +From: Tadeusz Struk + +commit 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d upstream. + +Syzbot reported slab-out-of-bounds read in exfat_clear_bitmap. +This was triggered by reproducer calling truncute with size 0, +which causes the following trace: + +BUG: KASAN: slab-out-of-bounds in exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174 +Read of size 8 at addr ffff888115aa9508 by task syz-executor251/365 + +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack_lvl+0x1e2/0x24b lib/dump_stack.c:118 + print_address_description+0x81/0x3c0 mm/kasan/report.c:233 + __kasan_report mm/kasan/report.c:419 [inline] + kasan_report+0x1a4/0x1f0 mm/kasan/report.c:436 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:309 + exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174 + exfat_free_cluster+0x25a/0x4a0 fs/exfat/fatent.c:181 + __exfat_truncate+0x99e/0xe00 fs/exfat/file.c:217 + exfat_truncate+0x11b/0x4f0 fs/exfat/file.c:243 + exfat_setattr+0xa03/0xd40 fs/exfat/file.c:339 + notify_change+0xb76/0xe10 fs/attr.c:336 + do_truncate+0x1ea/0x2d0 fs/open.c:65 + +Move the is_valid_cluster() helper from fatent.c to a common +header to make it reusable in other *.c files. And add is_valid_cluster() +to validate if cluster number is within valid range in exfat_clear_bitmap() +and exfat_set_bitmap(). + +Link: https://syzkaller.appspot.com/bug?id=50381fc73821ecae743b8cf24b4c9a04776f767c +Reported-by: syzbot+a4087e40b9c13aad7892@syzkaller.appspotmail.com +Fixes: 1e49a94cf707 ("exfat: add bitmap operations") +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Tadeusz Struk +Reviewed-by: Sungjong Seo +Signed-off-by: Namjae Jeon +Signed-off-by: Greg Kroah-Hartman +--- + fs/exfat/balloc.c | 8 ++++++-- + fs/exfat/exfat_fs.h | 6 ++++++ + fs/exfat/fatent.c | 6 ------ + 3 files changed, 12 insertions(+), 8 deletions(-) + +--- a/fs/exfat/balloc.c ++++ b/fs/exfat/balloc.c +@@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); + +- WARN_ON(clu < EXFAT_FIRST_CLUSTER); ++ if (!is_valid_cluster(sbi, clu)) ++ return -EINVAL; ++ + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); +@@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *in + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_mount_options *opts = &sbi->options; + +- WARN_ON(clu < EXFAT_FIRST_CLUSTER); ++ if (!is_valid_cluster(sbi, clu)) ++ return; ++ + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); + i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); + b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); +--- a/fs/exfat/exfat_fs.h ++++ b/fs/exfat/exfat_fs.h +@@ -380,6 +380,12 @@ static inline int exfat_sector_to_cluste + EXFAT_RESERVED_CLUSTERS; + } + ++static inline bool is_valid_cluster(struct exfat_sb_info *sbi, ++ unsigned int clus) ++{ ++ return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; ++} ++ + /* super.c */ + int exfat_set_volume_dirty(struct super_block *sb); + int exfat_clear_volume_dirty(struct super_block *sb); +--- a/fs/exfat/fatent.c ++++ b/fs/exfat/fatent.c +@@ -81,12 +81,6 @@ int exfat_ent_set(struct super_block *sb + return 0; + } + +-static inline bool is_valid_cluster(struct exfat_sb_info *sbi, +- unsigned int clus) +-{ +- return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters; +-} +- + int exfat_ent_get(struct super_block *sb, unsigned int loc, + unsigned int *content) + { diff --git a/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch b/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch new file mode 100644 index 00000000000..a8275598121 --- /dev/null +++ b/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch @@ -0,0 +1,98 @@ +From d8dad2588addd1d861ce19e7df3b702330f0c7e3 Mon Sep 17 00:00:00 2001 +From: Yuezhang Mo +Date: Mon, 4 Apr 2022 11:58:06 +0900 +Subject: exfat: fix referencing wrong parent directory information after renaming + +From: Yuezhang Mo + +commit d8dad2588addd1d861ce19e7df3b702330f0c7e3 upstream. + +During renaming, the parent directory information maybe +updated. But the file/directory still references to the +old parent directory information. + +This bug will cause 2 problems. + +(1) The renamed file can not be written. + + [10768.175172] exFAT-fs (sda1): error, failed to bmap (inode : 7afd50e4 iblock : 0, err : -5) + [10768.184285] exFAT-fs (sda1): Filesystem has been set read-only + ash: write error: Input/output error + +(2) Some dentries of the renamed file/directory are not set + to deleted after removing the file/directory. + +exfat_update_parent_info() is a workaround for the wrong parent +directory information being used after renaming. Now that bug is +fixed, this is no longer needed, so remove it. + +Fixes: 5f2aa075070c ("exfat: add inode operations") +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Yuezhang Mo +Reviewed-by: Andy Wu +Reviewed-by: Aoyama Wataru +Reviewed-by: Daniel Palmer +Reviewed-by: Sungjong Seo +Signed-off-by: Namjae Jeon +Signed-off-by: Greg Kroah-Hartman +--- + fs/exfat/namei.c | 27 +-------------------------- + 1 file changed, 1 insertion(+), 26 deletions(-) + +--- a/fs/exfat/namei.c ++++ b/fs/exfat/namei.c +@@ -1062,6 +1062,7 @@ static int exfat_rename_file(struct inod + + exfat_remove_entries(inode, p_dir, oldentry, 0, + num_old_entries); ++ ei->dir = *p_dir; + ei->entry = newentry; + } else { + if (exfat_get_entry_type(epold) == TYPE_FILE) { +@@ -1149,28 +1150,6 @@ static int exfat_move_file(struct inode + return 0; + } + +-static void exfat_update_parent_info(struct exfat_inode_info *ei, +- struct inode *parent_inode) +-{ +- struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb); +- struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode); +- loff_t parent_isize = i_size_read(parent_inode); +- +- /* +- * the problem that struct exfat_inode_info caches wrong parent info. +- * +- * because of flag-mismatch of ei->dir, +- * there is abnormal traversing cluster chain. +- */ +- if (unlikely(parent_ei->flags != ei->dir.flags || +- parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) || +- parent_ei->start_clu != ei->dir.dir)) { +- exfat_chain_set(&ei->dir, parent_ei->start_clu, +- EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi), +- parent_ei->flags); +- } +-} +- + /* rename or move a old file into a new file */ + static int __exfat_rename(struct inode *old_parent_inode, + struct exfat_inode_info *ei, struct inode *new_parent_inode, +@@ -1201,8 +1180,6 @@ static int __exfat_rename(struct inode * + return -ENOENT; + } + +- exfat_update_parent_info(ei, old_parent_inode); +- + exfat_chain_dup(&olddir, &ei->dir); + dentry = ei->entry; + +@@ -1223,8 +1200,6 @@ static int __exfat_rename(struct inode * + goto out; + } + +- exfat_update_parent_info(new_ei, new_parent_inode); +- + p_dir = &(new_ei->dir); + new_entry = new_ei->entry; + ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh); diff --git a/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch b/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch new file mode 100644 index 00000000000..d9e102dea1b --- /dev/null +++ b/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch @@ -0,0 +1,51 @@ +From 300981abddcb13f8f06ad58f52358b53a8096775 Mon Sep 17 00:00:00 2001 +From: Xiaomeng Tong +Date: Thu, 14 Apr 2022 14:21:03 +0800 +Subject: KVM: PPC: Book3S HV: fix incorrect NULL check on list iterator + +From: Xiaomeng Tong + +commit 300981abddcb13f8f06ad58f52358b53a8096775 upstream. + +The bug is here: + if (!p) + return ret; + +The list iterator value 'p' will *always* be set and non-NULL by +list_for_each_entry(), so it is incorrect to assume that the iterator +value will be NULL if the list is empty or no element is found. + +To fix the bug, Use a new value 'iter' as the list iterator, while use +the old value 'p' as a dedicated variable to point to the found element. + +Fixes: dfaa973ae960 ("KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs") +Cc: stable@vger.kernel.org # v5.9+ +Signed-off-by: Xiaomeng Tong +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20220414062103.8153-1-xiam0nd.tong@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/kvm/book3s_hv_uvmem.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/kvm/book3s_hv_uvmem.c ++++ b/arch/powerpc/kvm/book3s_hv_uvmem.c +@@ -360,13 +360,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsi + static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, + struct kvm *kvm, unsigned long *gfn) + { +- struct kvmppc_uvmem_slot *p; ++ struct kvmppc_uvmem_slot *p = NULL, *iter; + bool ret = false; + unsigned long i; + +- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) +- if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) ++ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) ++ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { ++ p = iter; + break; ++ } + if (!p) + return ret; + /* diff --git a/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch b/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch new file mode 100644 index 00000000000..85276ddaebe --- /dev/null +++ b/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch @@ -0,0 +1,88 @@ +From d22d2474e3953996f03528b84b7f52cc26a39403 Mon Sep 17 00:00:00 2001 +From: Ashish Kalra +Date: Mon, 16 May 2022 15:43:10 +0000 +Subject: KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak + +From: Ashish Kalra + +commit d22d2474e3953996f03528b84b7f52cc26a39403 upstream. + +For some sev ioctl interfaces, the length parameter that is passed maybe +less than or equal to SEV_FW_BLOB_MAX_SIZE, but larger than the data +that PSP firmware returns. In this case, kmalloc will allocate memory +that is the size of the input rather than the size of the data. +Since PSP firmware doesn't fully overwrite the allocated buffer, these +sev ioctl interface may return uninitialized kernel slab memory. + +Reported-by: Andy Nguyen +Suggested-by: David Rientjes +Suggested-by: Peter Gonda +Cc: kvm@vger.kernel.org +Cc: stable@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Fixes: eaf78265a4ab3 ("KVM: SVM: Move SEV code to separate file") +Fixes: 2c07ded06427d ("KVM: SVM: add support for SEV attestation command") +Fixes: 4cfdd47d6d95a ("KVM: SVM: Add KVM_SEV SEND_START command") +Fixes: d3d1af85e2c75 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") +Fixes: eba04b20e4861 ("KVM: x86: Account a variety of miscellaneous allocations") +Signed-off-by: Ashish Kalra +Reviewed-by: Peter Gonda +Message-Id: <20220516154310.3685678-1-Ashish.Kalra@amd.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/sev.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -684,7 +684,7 @@ static int sev_launch_measure(struct kvm + if (params.len > SEV_FW_BLOB_MAX_SIZE) + return -EINVAL; + +- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); ++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); + if (!blob) + return -ENOMEM; + +@@ -804,7 +804,7 @@ static int __sev_dbg_decrypt_user(struct + if (!IS_ALIGNED(dst_paddr, 16) || + !IS_ALIGNED(paddr, 16) || + !IS_ALIGNED(size, 16)) { +- tpage = (void *)alloc_page(GFP_KERNEL); ++ tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!tpage) + return -ENOMEM; + +@@ -1090,7 +1090,7 @@ static int sev_get_attestation_report(st + if (params.len > SEV_FW_BLOB_MAX_SIZE) + return -EINVAL; + +- blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); ++ blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); + if (!blob) + return -ENOMEM; + +@@ -1172,7 +1172,7 @@ static int sev_send_start(struct kvm *kv + return -EINVAL; + + /* allocate the memory to hold the session data blob */ +- session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT); ++ session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); + if (!session_data) + return -ENOMEM; + +@@ -1296,11 +1296,11 @@ static int sev_send_update_data(struct k + + /* allocate memory for header and transport buffer */ + ret = -ENOMEM; +- hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); ++ hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); + if (!hdr) + goto e_unpin; + +- trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT); ++ trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); + if (!trans_data) + goto e_free_hdr; + diff --git a/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch b/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch new file mode 100644 index 00000000000..094b7de1004 --- /dev/null +++ b/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch @@ -0,0 +1,107 @@ +From fee060cd52d69c114b62d1a2948ea9648b5131f9 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 11 Mar 2022 03:27:41 +0000 +Subject: KVM: x86: avoid calling x86 emulator without a decoded instruction + +From: Sean Christopherson + +commit fee060cd52d69c114b62d1a2948ea9648b5131f9 upstream. + +Whenever x86_decode_emulated_instruction() detects a breakpoint, it +returns the value that kvm_vcpu_check_breakpoint() writes into its +pass-by-reference second argument. Unfortunately this is completely +bogus because the expected outcome of x86_decode_emulated_instruction +is an EMULATION_* value. + +Then, if kvm_vcpu_check_breakpoint() does "*r = 0" (corresponding to +a KVM_EXIT_DEBUG userspace exit), it is misunderstood as EMULATION_OK +and x86_emulate_instruction() is called without having decoded the +instruction. This causes various havoc from running with a stale +emulation context. + +The fix is to move the call to kvm_vcpu_check_breakpoint() where it was +before commit 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction +emulation with decoding") introduced x86_decode_emulated_instruction(). +The other caller of the function does not need breakpoint checks, +because it is invoked as part of a vmexit and the processor has already +checked those before executing the instruction that #GP'd. + +This fixes CVE-2022-1852. + +Reported-by: Qiuhao Li +Reported-by: Gaoning Pan +Reported-by: Yongkang Jia +Fixes: 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction emulation with decoding") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220311032801.3467418-2-seanjc@google.com> +[Rewrote commit message according to Qiuhao's report, since a patch + already existed to fix the bug. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 31 +++++++++++++++++++------------ + 1 file changed, 19 insertions(+), 12 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8169,7 +8169,7 @@ int kvm_skip_emulated_instruction(struct + } + EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); + +-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) ++static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) + { + if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && + (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { +@@ -8238,25 +8238,23 @@ static bool is_vmware_backdoor_opcode(st + } + + /* +- * Decode to be emulated instruction. Return EMULATION_OK if success. ++ * Decode an instruction for emulation. The caller is responsible for handling ++ * code breakpoints. Note, manually detecting code breakpoints is unnecessary ++ * (and wrong) when emulating on an intercepted fault-like exception[*], as ++ * code breakpoints have higher priority and thus have already been done by ++ * hardware. ++ * ++ * [*] Except #MC, which is higher priority, but KVM should never emulate in ++ * response to a machine check. + */ + int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, + void *insn, int insn_len) + { +- int r = EMULATION_OK; + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; ++ int r; + + init_emulate_ctxt(vcpu); + +- /* +- * We will reenter on the same instruction since we do not set +- * complete_userspace_io. This does not handle watchpoints yet, +- * those would be handled in the emulate_ops. +- */ +- if (!(emulation_type & EMULTYPE_SKIP) && +- kvm_vcpu_check_breakpoint(vcpu, &r)) +- return r; +- + r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); + + trace_kvm_emulate_insn_start(vcpu); +@@ -8289,6 +8287,15 @@ int x86_emulate_instruction(struct kvm_v + if (!(emulation_type & EMULTYPE_NO_DECODE)) { + kvm_clear_exception_queue(vcpu); + ++ /* ++ * Return immediately if RIP hits a code breakpoint, such #DBs ++ * are fault-like and are higher priority than any faults on ++ * the code fetch itself. ++ */ ++ if (!(emulation_type & EMULTYPE_SKIP) && ++ kvm_vcpu_check_code_breakpoint(vcpu, &r)) ++ return r; ++ + r = x86_decode_emulated_instruction(vcpu, emulation_type, + insn, insn_len); + if (r != EMULATION_OK) { diff --git a/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch b/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch new file mode 100644 index 00000000000..64a18b470b3 --- /dev/null +++ b/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch @@ -0,0 +1,63 @@ +From 6fcee03df6a1a3101a77344be37bb85c6142d56c Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 22 Mar 2022 19:24:42 +0200 +Subject: KVM: x86: avoid loading a vCPU after .vm_destroy was called + +From: Maxim Levitsky + +commit 6fcee03df6a1a3101a77344be37bb85c6142d56c upstream. + +This can cause various unexpected issues, since VM is partially +destroyed at that point. + +For example when AVIC is enabled, this causes avic_vcpu_load to +access physical id page entry which is already freed by .vm_destroy. + +Fixes: 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC") +Cc: stable@vger.kernel.org +Signed-off-by: Maxim Levitsky +Message-Id: <20220322172449.235575-2-mlevitsk@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -11655,20 +11655,15 @@ static void kvm_unload_vcpu_mmu(struct k + vcpu_put(vcpu); + } + +-static void kvm_free_vcpus(struct kvm *kvm) ++static void kvm_unload_vcpu_mmus(struct kvm *kvm) + { + unsigned long i; + struct kvm_vcpu *vcpu; + +- /* +- * Unpin any mmu pages first. +- */ + kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_unload_vcpu_mmu(vcpu); + } +- +- kvm_destroy_vcpus(kvm); + } + + void kvm_arch_sync_events(struct kvm *kvm) +@@ -11774,11 +11769,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm + __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); + mutex_unlock(&kvm->slots_lock); + } ++ kvm_unload_vcpu_mmus(kvm); + static_call_cond(kvm_x86_vm_destroy)(kvm); + kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); + kvm_pic_destroy(kvm); + kvm_ioapic_destroy(kvm); +- kvm_free_vcpus(kvm); ++ kvm_destroy_vcpus(kvm); + kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); + kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); + kvm_mmu_uninit_vm(kvm); diff --git a/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch b/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch new file mode 100644 index 00000000000..e4555dc6a64 --- /dev/null +++ b/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch @@ -0,0 +1,83 @@ +From 45846661d10422ce9e22da21f8277540b29eca22 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 7 Apr 2022 00:23:13 +0000 +Subject: KVM: x86: Drop WARNs that assert a triple fault never "escapes" from L2 + +From: Sean Christopherson + +commit 45846661d10422ce9e22da21f8277540b29eca22 upstream. + +Remove WARNs that sanity check that KVM never lets a triple fault for L2 +escape and incorrectly end up in L1. In normal operation, the sanity +check is perfectly valid, but it incorrectly assumes that it's impossible +for userspace to induce KVM_REQ_TRIPLE_FAULT without bouncing through +KVM_RUN (which guarantees kvm_check_nested_state() will see and handle +the triple fault). + +The WARN can currently be triggered if userspace injects a machine check +while L2 is active and CR4.MCE=0. And a future fix to allow save/restore +of KVM_REQ_TRIPLE_FAULT, e.g. so that a synthesized triple fault isn't +lost on migration, will make it trivially easy for userspace to trigger +the WARN. + +Clearing KVM_REQ_TRIPLE_FAULT when forcibly leaving guest mode is +tempting, but wrong, especially if/when the request is saved/restored, +e.g. if userspace restores events (including a triple fault) and then +restores nested state (which may forcibly leave guest mode). Ignoring +the fact that KVM doesn't currently provide the necessary APIs, it's +userspace's responsibility to manage pending events during save/restore. + + ------------[ cut here ]------------ + WARNING: CPU: 7 PID: 1399 at arch/x86/kvm/vmx/nested.c:4522 nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel] + Modules linked in: kvm_intel kvm irqbypass + CPU: 7 PID: 1399 Comm: state_test Not tainted 5.17.0-rc3+ #808 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel] + Call Trace: + + vmx_leave_nested+0x30/0x40 [kvm_intel] + vmx_set_nested_state+0xca/0x3e0 [kvm_intel] + kvm_arch_vcpu_ioctl+0xf49/0x13e0 [kvm] + kvm_vcpu_ioctl+0x4b9/0x660 [kvm] + __x64_sys_ioctl+0x83/0xb0 + do_syscall_64+0x3b/0xc0 + entry_SYSCALL_64_after_hwframe+0x44/0xae + + ---[ end trace 0000000000000000 ]--- + +Fixes: cb6a32c2b877 ("KVM: x86: Handle triple fault in L2 without killing L1") +Cc: stable@vger.kernel.org +Cc: Chenyi Qiang +Signed-off-by: Sean Christopherson +Message-Id: <20220407002315.78092-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 3 --- + arch/x86/kvm/vmx/nested.c | 3 --- + 2 files changed, 6 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -790,9 +790,6 @@ int nested_svm_vmexit(struct vcpu_svm *s + struct kvm_host_map map; + int rc; + +- /* Triple faults in L2 should never escape. */ +- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); +- + rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); + if (rc) { + if (rc == -EINVAL) +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4518,9 +4518,6 @@ void nested_vmx_vmexit(struct kvm_vcpu * + /* trying to cancel vmlaunch/vmresume is a bug */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + +- /* Similarly, triple faults in L2 should never escape. */ +- WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); +- + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { + /* + * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map diff --git a/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch b/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch new file mode 100644 index 00000000000..55a211dedf8 --- /dev/null +++ b/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch @@ -0,0 +1,40 @@ +From ffd1925a596ce68bed7d81c61cb64bc35f788a9d Mon Sep 17 00:00:00 2001 +From: Yanfei Xu +Date: Mon, 23 May 2022 22:08:21 +0800 +Subject: KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest + +From: Yanfei Xu + +commit ffd1925a596ce68bed7d81c61cb64bc35f788a9d upstream. + +When kernel handles the vm-exit caused by external interrupts and NMI, +it always sets kvm_intr_type to tell if it's dealing an IRQ or NMI. For +the PMI scenario, it could be IRQ or NMI. + +However, intel_pt PMIs are only generated for HARDWARE perf events, and +HARDWARE events are always configured to generate NMIs. Use +kvm_handling_nmi_from_guest() to precisely identify if the intel_pt PMI +came from the guest; this avoids false positives if an intel_pt PMI/NMI +arrives while the host is handling an unrelated IRQ VM-Exit. + +Fixes: db215756ae59 ("KVM: x86: More precisely identify NMI from guest when handling PMI") +Signed-off-by: Yanfei Xu +Message-Id: <20220523140821.1345605-1-yanfei.xu@intel.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -7858,7 +7858,7 @@ static unsigned int vmx_handle_intel_pt_ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + /* '0' on failure so that the !PT case can use a RET0 static call. */ +- if (!kvm_arch_pmi_in_guest(vcpu)) ++ if (!vcpu || !kvm_handling_nmi_from_guest(vcpu)) + return 0; + + kvm_make_request(KVM_REQ_PMI, vcpu); diff --git a/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch b/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch new file mode 100644 index 00000000000..39736c539ed --- /dev/null +++ b/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch @@ -0,0 +1,35 @@ +From 33fbe6befa622c082f7d417896832856814bdde0 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 12 May 2022 13:14:20 +0300 +Subject: KVM: x86: fix typo in __try_cmpxchg_user causing non-atomicness + +From: Maxim Levitsky + +commit 33fbe6befa622c082f7d417896832856814bdde0 upstream. + +This shows up as a TDP MMU leak when running nested. Non-working cmpxchg on L0 +relies makes L1 install two different shadow pages under same spte, and one of +them is leaked. + +Fixes: 1c2361f667f36 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses") +Signed-off-by: Maxim Levitsky +Message-Id: <20220512101420.306759-1-mlevitsk@redhat.com> +Reviewed-by: Sean Christopherson +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7207,7 +7207,7 @@ static int emulator_cmpxchg_emulated(str + goto emul_write; + + hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa)); +- if (kvm_is_error_hva(addr)) ++ if (kvm_is_error_hva(hva)) + goto emul_write; + + hva += offset_in_page(gpa); diff --git a/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch b/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch new file mode 100644 index 00000000000..1e3ee897a44 --- /dev/null +++ b/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch @@ -0,0 +1,89 @@ +From 8d5678a76689acbf91245a3791fe853ab773090f Mon Sep 17 00:00:00 2001 +From: Hou Wenlong +Date: Tue, 15 Mar 2022 17:35:13 +0800 +Subject: KVM: x86/mmu: Don't rebuild page when the page is synced and no tlb flushing is required + +From: Hou Wenlong + +commit 8d5678a76689acbf91245a3791fe853ab773090f upstream. + +Before Commit c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page() +to return true when remote flush is needed"), the return value +of kvm_sync_page() indicates whether the page is synced, and +kvm_mmu_get_page() would rebuild page when the sync fails. +But now, kvm_sync_page() returns false when the page is +synced and no tlb flushing is required, which leads to +rebuild page in kvm_mmu_get_page(). So return the return +value of mmu->sync_page() directly and check it in +kvm_mmu_get_page(). If the sync fails, the page will be +zapped and the invalid_list is not empty, so set flush as +true is accepted in mmu_sync_children(). + +Cc: stable@vger.kernel.org +Fixes: c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page() to return true when remote flush is needed") +Signed-off-by: Hou Wenlong +Acked-by: Lai Jiangshan +Message-Id: <0dabeeb789f57b0d793f85d073893063e692032d.1647336064.git.houwenlong.hwl@antgroup.com> +[mmu_sync_children should not flush if the page is zapped. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -1894,17 +1894,14 @@ static void kvm_mmu_commit_zap_page(stru + &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ + if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else + +-static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ++static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + struct list_head *invalid_list) + { + int ret = vcpu->arch.mmu->sync_page(vcpu, sp); + +- if (ret < 0) { ++ if (ret < 0) + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); +- return false; +- } +- +- return !!ret; ++ return ret; + } + + static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, +@@ -2033,7 +2030,7 @@ static int mmu_sync_children(struct kvm_ + + for_each_sp(pages, sp, parents, i) { + kvm_unlink_unsync_page(vcpu->kvm, sp); +- flush |= kvm_sync_page(vcpu, sp, &invalid_list); ++ flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0; + mmu_pages_clear_parents(&parents); + } + if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { +@@ -2074,6 +2071,7 @@ static struct kvm_mmu_page *kvm_mmu_get_ + struct hlist_head *sp_list; + unsigned quadrant; + struct kvm_mmu_page *sp; ++ int ret; + int collisions = 0; + LIST_HEAD(invalid_list); + +@@ -2126,11 +2124,13 @@ static struct kvm_mmu_page *kvm_mmu_get_ + * If the sync fails, the page is zapped. If so, break + * in order to rebuild it. + */ +- if (!kvm_sync_page(vcpu, sp, &invalid_list)) ++ ret = kvm_sync_page(vcpu, sp, &invalid_list); ++ if (ret < 0) + break; + + WARN_ON(!list_empty(&invalid_list)); +- kvm_flush_remote_tlbs(vcpu->kvm); ++ if (ret > 0) ++ kvm_flush_remote_tlbs(vcpu->kvm); + } + + __clear_sp_write_flooding_count(sp); diff --git a/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch new file mode 100644 index 00000000000..3b598209d10 --- /dev/null +++ b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch @@ -0,0 +1,103 @@ +From 1c2361f667f3648855ceae25f1332c18413fdb9f Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 2 Feb 2022 00:49:44 +0000 +Subject: KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses + +From: Sean Christopherson + +commit 1c2361f667f3648855ceae25f1332c18413fdb9f upstream. + +Use the recently introduce __try_cmpxchg_user() to emulate atomic guest +accesses via the associated userspace address instead of mapping the +backing pfn into kernel address space. Using kvm_vcpu_map() is unsafe as +it does not coordinate with KVM's mmu_notifier to ensure the hva=>pfn +translation isn't changed/unmapped in the memremap() path, i.e. when +there's no struct page and thus no elevated refcount. + +Fixes: 42e35f8072c3 ("KVM/X86: Use kvm_vcpu_map in emulator_cmpxchg_emulated") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220202004945.2540433-5-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 35 ++++++++++++++--------------------- + 1 file changed, 14 insertions(+), 21 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7168,15 +7168,8 @@ static int emulator_write_emulated(struc + exception, &write_emultor); + } + +-#define CMPXCHG_TYPE(t, ptr, old, new) \ +- (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) +- +-#ifdef CONFIG_X86_64 +-# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) +-#else +-# define CMPXCHG64(ptr, old, new) \ +- (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) +-#endif ++#define emulator_try_cmpxchg_user(t, ptr, old, new) \ ++ (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t)) + + static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, + unsigned long addr, +@@ -7185,12 +7178,11 @@ static int emulator_cmpxchg_emulated(str + unsigned int bytes, + struct x86_exception *exception) + { +- struct kvm_host_map map; + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + u64 page_line_mask; ++ unsigned long hva; + gpa_t gpa; +- char *kaddr; +- bool exchanged; ++ int r; + + /* guests cmpxchg8b have to be emulated atomically */ + if (bytes > 8 || (bytes & (bytes - 1))) +@@ -7214,31 +7206,32 @@ static int emulator_cmpxchg_emulated(str + if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) + goto emul_write; + +- if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) ++ hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa)); ++ if (kvm_is_error_hva(addr)) + goto emul_write; + +- kaddr = map.hva + offset_in_page(gpa); ++ hva += offset_in_page(gpa); + + switch (bytes) { + case 1: +- exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); ++ r = emulator_try_cmpxchg_user(u8, hva, old, new); + break; + case 2: +- exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); ++ r = emulator_try_cmpxchg_user(u16, hva, old, new); + break; + case 4: +- exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); ++ r = emulator_try_cmpxchg_user(u32, hva, old, new); + break; + case 8: +- exchanged = CMPXCHG64(kaddr, old, new); ++ r = emulator_try_cmpxchg_user(u64, hva, old, new); + break; + default: + BUG(); + } + +- kvm_vcpu_unmap(vcpu, &map, true); +- +- if (!exchanged) ++ if (r < 0) ++ goto emul_write; ++ if (r) + return X86EMUL_CMPXCHG_FAILED; + + kvm_page_track_write(vcpu, gpa, new, bytes); diff --git a/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch new file mode 100644 index 00000000000..5eaf367fe73 --- /dev/null +++ b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch @@ -0,0 +1,84 @@ +From f122dfe4476890d60b8c679128cd2259ec96a24c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 2 Feb 2022 00:49:43 +0000 +Subject: KVM: x86: Use __try_cmpxchg_user() to update guest PTE A/D bits + +From: Sean Christopherson + +commit f122dfe4476890d60b8c679128cd2259ec96a24c upstream. + +Use the recently introduced __try_cmpxchg_user() to update guest PTE A/D +bits instead of mapping the PTE into kernel address space. The VM_PFNMAP +path is broken as it assumes that vm_pgoff is the base pfn of the mapped +VMA range, which is conceptually wrong as vm_pgoff is the offset relative +to the file and has nothing to do with the pfn. The horrific hack worked +for the original use case (backing guest memory with /dev/mem), but leads +to accessing "random" pfns for pretty much any other VM_PFNMAP case. + +Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") +Debugged-by: Tadeusz Struk +Tested-by: Tadeusz Struk +Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220202004945.2540433-4-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/paging_tmpl.h | 38 +------------------------------------- + 1 file changed, 1 insertion(+), 37 deletions(-) + +--- a/arch/x86/kvm/mmu/paging_tmpl.h ++++ b/arch/x86/kvm/mmu/paging_tmpl.h +@@ -144,42 +144,6 @@ static bool FNAME(is_rsvd_bits_set)(stru + FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); + } + +-static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +- pt_element_t __user *ptep_user, unsigned index, +- pt_element_t orig_pte, pt_element_t new_pte) +-{ +- signed char r; +- +- if (!user_access_begin(ptep_user, sizeof(pt_element_t))) +- return -EFAULT; +- +-#ifdef CMPXCHG +- asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" +- "setnz %b[r]\n" +- "2:" +- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) +- : [ptr] "+m" (*ptep_user), +- [old] "+a" (orig_pte), +- [r] "=q" (r) +- : [new] "r" (new_pte) +- : "memory"); +-#else +- asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" +- "setnz %b[r]\n" +- "2:" +- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) +- : [ptr] "+m" (*ptep_user), +- [old] "+A" (orig_pte), +- [r] "=q" (r) +- : [new_lo] "b" ((u32)new_pte), +- [new_hi] "c" ((u32)(new_pte >> 32)) +- : "memory"); +-#endif +- +- user_access_end(); +- return r; +-} +- + static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp, u64 *spte, + u64 gpte) +@@ -278,7 +242,7 @@ static int FNAME(update_accessed_dirty_b + if (unlikely(!walker->pte_writable[level - 1])) + continue; + +- ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); ++ ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault); + if (ret) + return ret; + diff --git a/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch b/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch new file mode 100644 index 00000000000..e863f1f02e9 --- /dev/null +++ b/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch @@ -0,0 +1,49 @@ +From c5794097b269f15961ed78f7f27b50e51766dec9 Mon Sep 17 00:00:00 2001 +From: Alex Elder +Date: Thu, 21 Apr 2022 13:53:33 -0500 +Subject: net: ipa: compute proper aggregation limit + +From: Alex Elder + +commit c5794097b269f15961ed78f7f27b50e51766dec9 upstream. + +The aggregation byte limit for an endpoint is currently computed +based on the endpoint's receive buffer size. + +However, some bytes at the front of each receive buffer are reserved +on the assumption that--as with SKBs--it might be useful to insert +data (such as headers) before what lands in the buffer. + +The aggregation byte limit currently doesn't take into account that +reserved space, and as a result, aggregation could require space +past that which is available in the buffer. + +Fix this by reducing the size used to compute the aggregation byte +limit by the NET_SKB_PAD offset reserved for each receive buffer. + +Signed-off-by: Alex Elder +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipa/ipa_endpoint.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ipa/ipa_endpoint.c ++++ b/drivers/net/ipa/ipa_endpoint.c +@@ -723,13 +723,15 @@ static void ipa_endpoint_init_aggr(struc + + if (endpoint->data->aggregation) { + if (!endpoint->toward_ipa) { ++ u32 buffer_size; + bool close_eof; + u32 limit; + + val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK); + val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK); + +- limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE); ++ buffer_size = IPA_RX_BUFFER_SIZE - NET_SKB_PAD; ++ limit = ipa_aggr_size_kb(buffer_size); + val |= aggr_byte_limit_encoded(version, limit); + + limit = IPA_AGGR_TIME_LIMIT; diff --git a/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch b/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch new file mode 100644 index 00000000000..b634dd75240 --- /dev/null +++ b/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch @@ -0,0 +1,43 @@ +From 56b14ecec97f39118bf85c9ac2438c5a949509ed Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Fri, 20 May 2022 00:02:04 +0200 +Subject: netfilter: conntrack: re-fetch conntrack after insertion + +From: Florian Westphal + +commit 56b14ecec97f39118bf85c9ac2438c5a949509ed upstream. + +In case the conntrack is clashing, insertion can free skb->_nfct and +set skb->_nfct to the already-confirmed entry. + +This wasn't found before because the conntrack entry and the extension +space used to free'd after an rcu grace period, plus the race needs +events enabled to trigger. + +Reported-by: +Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") +Fixes: 2ad9d7747c10 ("netfilter: conntrack: free extension area immediately") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_conntrack_core.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/include/net/netfilter/nf_conntrack_core.h ++++ b/include/net/netfilter/nf_conntrack_core.h +@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(s + int ret = NF_ACCEPT; + + if (ct) { +- if (!nf_ct_is_confirmed(ct)) ++ if (!nf_ct_is_confirmed(ct)) { + ret = __nf_conntrack_confirm(skb); ++ ++ if (ret == NF_ACCEPT) ++ ct = (struct nf_conn *)skb_nfct(skb); ++ } ++ + if (likely(ret == NF_ACCEPT)) + nf_ct_deliver_cached_events(ct); + } diff --git a/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch b/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch new file mode 100644 index 00000000000..07dc611b5f5 --- /dev/null +++ b/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch @@ -0,0 +1,137 @@ +From f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Mon, 30 May 2022 18:24:06 +0200 +Subject: netfilter: nf_tables: double hook unregistration in netns path + +From: Pablo Neira Ayuso + +commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream. + +__nft_release_hooks() is called from pre_netns exit path which +unregisters the hooks, then the NETDEV_UNREGISTER event is triggered +which unregisters the hooks again. + +[ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270 +[...] +[ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27 +[ 565.253682] Workqueue: netns cleanup_net +[ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270 +[...] +[ 565.297120] Call Trace: +[ 565.300900] +[ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables] +[ 565.308518] raw_notifier_call_chain+0x63/0x80 +[ 565.312386] unregister_netdevice_many+0x54f/0xb50 + +Unregister and destroy netdev hook from netns pre_exit via kfree_rcu +so the NETDEV_UNREGISTER path see unregistered hooks. + +Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 54 +++++++++++++++++++++++++++++++----------- + 1 file changed, 41 insertions(+), 13 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -222,12 +222,18 @@ err_register: + } + + static void nft_netdev_unregister_hooks(struct net *net, +- struct list_head *hook_list) ++ struct list_head *hook_list, ++ bool release_netdev) + { +- struct nft_hook *hook; ++ struct nft_hook *hook, *next; + +- list_for_each_entry(hook, hook_list, list) ++ list_for_each_entry_safe(hook, next, hook_list, list) { + nf_unregister_net_hook(net, &hook->ops); ++ if (release_netdev) { ++ list_del(&hook->list); ++ kfree_rcu(hook, rcu); ++ } ++ } + } + + static int nf_tables_register_hook(struct net *net, +@@ -253,9 +259,10 @@ static int nf_tables_register_hook(struc + return nf_register_net_hook(net, &basechain->ops); + } + +-static void nf_tables_unregister_hook(struct net *net, +- const struct nft_table *table, +- struct nft_chain *chain) ++static void __nf_tables_unregister_hook(struct net *net, ++ const struct nft_table *table, ++ struct nft_chain *chain, ++ bool release_netdev) + { + struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; +@@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(st + return basechain->type->ops_unregister(net, ops); + + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) +- nft_netdev_unregister_hooks(net, &basechain->hook_list); ++ nft_netdev_unregister_hooks(net, &basechain->hook_list, ++ release_netdev); + else + nf_unregister_net_hook(net, &basechain->ops); + } + ++static void nf_tables_unregister_hook(struct net *net, ++ const struct nft_table *table, ++ struct nft_chain *chain) ++{ ++ return __nf_tables_unregister_hook(net, table, chain, false); ++} ++ + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) + { + struct nftables_pernet *nft_net = nft_pernet(net); +@@ -7222,13 +7237,25 @@ static void nft_unregister_flowtable_hoo + FLOW_BLOCK_UNBIND); + } + +-static void nft_unregister_flowtable_net_hooks(struct net *net, +- struct list_head *hook_list) ++static void __nft_unregister_flowtable_net_hooks(struct net *net, ++ struct list_head *hook_list, ++ bool release_netdev) + { +- struct nft_hook *hook; ++ struct nft_hook *hook, *next; + +- list_for_each_entry(hook, hook_list, list) ++ list_for_each_entry_safe(hook, next, hook_list, list) { + nf_unregister_net_hook(net, &hook->ops); ++ if (release_netdev) { ++ list_del(&hook->list); ++ kfree_rcu(hook); ++ } ++ } ++} ++ ++static void nft_unregister_flowtable_net_hooks(struct net *net, ++ struct list_head *hook_list) ++{ ++ __nft_unregister_flowtable_net_hooks(net, hook_list, false); + } + + static int nft_register_flowtable_net_hooks(struct net *net, +@@ -9672,9 +9699,10 @@ static void __nft_release_hook(struct ne + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) +- nf_tables_unregister_hook(net, table, chain); ++ __nf_tables_unregister_hook(net, table, chain, true); + list_for_each_entry(flowtable, &table->flowtables, list) +- nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); ++ __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, ++ true); + } + + static void __nft_release_hooks(struct net *net) diff --git a/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch b/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch new file mode 100644 index 00000000000..3a76450d359 --- /dev/null +++ b/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch @@ -0,0 +1,32 @@ +From 3923b1e4406680d57da7e873da77b1683035d83f Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Mon, 30 May 2022 18:24:05 +0200 +Subject: netfilter: nf_tables: hold mutex on netns pre_exit path + +From: Pablo Neira Ayuso + +commit 3923b1e4406680d57da7e873da77b1683035d83f upstream. + +clean_net() runs in workqueue while walking over the lists, grab mutex. + +Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9813,7 +9813,11 @@ static int __net_init nf_tables_init_net + + static void __net_exit nf_tables_pre_exit_net(struct net *net) + { ++ struct nftables_pernet *nft_net = nft_pernet(net); ++ ++ mutex_lock(&nft_net->commit_mutex); + __nft_release_hooks(net); ++ mutex_unlock(&nft_net->commit_mutex); + } + + static void __net_exit nf_tables_exit_net(struct net *net) diff --git a/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch b/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch new file mode 100644 index 00000000000..922038f8598 --- /dev/null +++ b/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch @@ -0,0 +1,74 @@ +From fecf31ee395b0295f2d7260aa29946b7605f7c85 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Fri, 27 May 2022 09:56:18 +0200 +Subject: netfilter: nf_tables: sanitize nft_set_desc_concat_parse() + +From: Pablo Neira Ayuso + +commit fecf31ee395b0295f2d7260aa29946b7605f7c85 upstream. + +Add several sanity checks for nft_set_desc_concat_parse(): + +- validate desc->field_count not larger than desc->field_len array. +- field length cannot be larger than desc->field_len (ie. U8_MAX) +- total length of the concatenation cannot be larger than register array. + +Joint work with Florian Westphal. + +Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") +Reported-by: +Reviewed-by: Stefano Brivio +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4167,6 +4167,9 @@ static int nft_set_desc_concat_parse(con + u32 len; + int err; + ++ if (desc->field_count >= ARRAY_SIZE(desc->field_len)) ++ return -E2BIG; ++ + err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, + nft_concat_policy, NULL); + if (err < 0) +@@ -4176,9 +4179,8 @@ static int nft_set_desc_concat_parse(con + return -EINVAL; + + len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); +- +- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT) +- return -E2BIG; ++ if (!len || len > U8_MAX) ++ return -EINVAL; + + desc->field_len[desc->field_count++] = len; + +@@ -4189,7 +4191,8 @@ static int nft_set_desc_concat(struct nf + const struct nlattr *nla) + { + struct nlattr *attr; +- int rem, err; ++ u32 num_regs = 0; ++ int rem, err, i; + + nla_for_each_nested(attr, nla, rem) { + if (nla_type(attr) != NFTA_LIST_ELEM) +@@ -4200,6 +4203,12 @@ static int nft_set_desc_concat(struct nf + return err; + } + ++ for (i = 0; i < desc->field_count; i++) ++ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); ++ ++ if (num_regs > NFT_REG32_COUNT) ++ return -E2BIG; ++ + return 0; + } + diff --git a/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch b/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch new file mode 100644 index 00000000000..9905e5c4eb4 --- /dev/null +++ b/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch @@ -0,0 +1,31 @@ +From 558254b0b602b8605d7246a10cfeb584b1fcabfc Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Tue, 24 May 2022 14:50:01 +0200 +Subject: netfilter: nft_limit: Clone packet limits' cost value + +From: Phil Sutter + +commit 558254b0b602b8605d7246a10cfeb584b1fcabfc upstream. + +When cloning a packet-based limit expression, copy the cost value as +well. Otherwise the new limit is not functional anymore. + +Fixes: 3b9e2ea6c11bf ("netfilter: nft_limit: move stateful fields out of expression data") +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_limit.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/netfilter/nft_limit.c ++++ b/net/netfilter/nft_limit.c +@@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct n + struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); + struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); + ++ priv_dst->cost = priv_src->cost; ++ + return nft_limit_clone(&priv_dst->limit, &priv_src->limit); + } + diff --git a/queue-5.17/series b/queue-5.17/series index 075c0e0cbd5..149793ac5d7 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -14,3 +14,26 @@ i2c-ismt-prevent-memory-corruption-in-ismt_access.patch assoc_array-fix-bug_on-during-garbage-collect.patch pipe-make-poll_usage-boolean-and-annotate-its-access.patch pipe-fix-missing-lock-in-pipe_resize_ring.patch +net-ipa-compute-proper-aggregation-limit.patch +drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch +exfat-check-if-cluster-num-is-valid.patch +exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch +netfilter-nft_limit-clone-packet-limits-cost-value.patch +netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch +netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch +netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch +netfilter-conntrack-re-fetch-conntrack-after-insertion.patch +kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch +x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch +x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch +x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch +x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch +kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch +kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch +kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch +kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch +kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch +kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch +kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch +kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch +kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch diff --git a/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch b/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch new file mode 100644 index 00000000000..f71d4648710 --- /dev/null +++ b/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch @@ -0,0 +1,113 @@ +From d187ba5312307d51818beafaad87d28a7d939adf Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 4 May 2022 00:12:19 +0000 +Subject: x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave) + +From: Sean Christopherson + +commit d187ba5312307d51818beafaad87d28a7d939adf upstream. + +Set the starting uABI size of KVM's guest FPU to 'struct kvm_xsave', +i.e. to KVM's historical uABI size. When saving FPU state for usersapce, +KVM (well, now the FPU) sets the FP+SSE bits in the XSAVE header even if +the host doesn't support XSAVE. Setting the XSAVE header allows the VM +to be migrated to a host that does support XSAVE without the new host +having to handle FPU state that may or may not be compatible with XSAVE. + +Setting the uABI size to the host's default size results in out-of-bounds +writes (setting the FP+SSE bits) and data corruption (that is thankfully +caught by KASAN) when running on hosts without XSAVE, e.g. on Core2 CPUs. + +WARN if the default size is larger than KVM's historical uABI size; all +features that can push the FPU size beyond the historical size must be +opt-in. + + ================================================================== + BUG: KASAN: slab-out-of-bounds in fpu_copy_uabi_to_guest_fpstate+0x86/0x130 + Read of size 8 at addr ffff888011e33a00 by task qemu-build/681 + CPU: 1 PID: 681 Comm: qemu-build Not tainted 5.18.0-rc5-KASAN-amd64 #1 + Hardware name: /DG35EC, BIOS ECG3510M.86A.0118.2010.0113.1426 01/13/2010 + Call Trace: + + dump_stack_lvl+0x34/0x45 + print_report.cold+0x45/0x575 + kasan_report+0x9b/0xd0 + fpu_copy_uabi_to_guest_fpstate+0x86/0x130 + kvm_arch_vcpu_ioctl+0x72a/0x1c50 [kvm] + kvm_vcpu_ioctl+0x47f/0x7b0 [kvm] + __x64_sys_ioctl+0x5de/0xc90 + do_syscall_64+0x31/0x50 + entry_SYSCALL_64_after_hwframe+0x44/0xae + + Allocated by task 0: + (stack is not available) + The buggy address belongs to the object at ffff888011e33800 + which belongs to the cache kmalloc-512 of size 512 + The buggy address is located 0 bytes to the right of + 512-byte region [ffff888011e33800, ffff888011e33a00) + The buggy address belongs to the physical page: + page:0000000089cd4adb refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11e30 + head:0000000089cd4adb order:2 compound_mapcount:0 compound_pincount:0 + flags: 0x4000000000010200(slab|head|zone=1) + raw: 4000000000010200 dead000000000100 dead000000000122 ffff888001041c80 + raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 + page dumped because: kasan: bad access detected + Memory state around the buggy address: + ffff888011e33900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff888011e33980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + >ffff888011e33a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ^ + ffff888011e33a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff888011e33b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ================================================================== + Disabling lock debugging due to kernel taint + +Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") +Fixes: c60427dd50ba ("x86/fpu: Add uabi_size to guest_fpu") +Reported-by: Zdenek Kaspar +Cc: Maciej S. Szmigiero +Cc: Paolo Bonzini +Cc: kvm@vger.kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Tested-by: Zdenek Kaspar +Message-Id: <20220504001219.983513-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/core.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -14,6 +14,8 @@ + #include + #include + ++#include ++ + #include + #include + #include +@@ -232,7 +234,20 @@ bool fpu_alloc_guest_fpstate(struct fpu_ + gfpu->fpstate = fpstate; + gfpu->xfeatures = fpu_user_cfg.default_features; + gfpu->perm = fpu_user_cfg.default_features; +- gfpu->uabi_size = fpu_user_cfg.default_size; ++ ++ /* ++ * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state ++ * to userspace, even when XSAVE is unsupported, so that restoring FPU ++ * state on a different CPU that does support XSAVE can cleanly load ++ * the incoming state using its natural XSAVE. In other words, KVM's ++ * uABI size may be larger than this host's default size. Conversely, ++ * the default size should never be larger than KVM's base uABI size; ++ * all features that can expand the uABI size must be opt-in. ++ */ ++ gfpu->uabi_size = sizeof(struct kvm_xsave); ++ if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size)) ++ gfpu->uabi_size = fpu_user_cfg.default_size; ++ + fpu_init_guest_permissions(gfpu); + + return true; diff --git a/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch b/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch new file mode 100644 index 00000000000..44d39c4ebd1 --- /dev/null +++ b/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch @@ -0,0 +1,91 @@ +From 0547758a6de3cc71a0cfdd031a3621a30db6a68b Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 19 May 2022 07:57:11 -0700 +Subject: x86/kvm: Alloc dummy async #PF token outside of raw spinlock + +From: Sean Christopherson + +commit 0547758a6de3cc71a0cfdd031a3621a30db6a68b upstream. + +Drop the raw spinlock in kvm_async_pf_task_wake() before allocating the +the dummy async #PF token, the allocator is preemptible on PREEMPT_RT +kernels and must not be called from truly atomic contexts. + +Opportunistically document why it's ok to loop on allocation failure, +i.e. why the function won't get stuck in an infinite loop. + +Reported-by: Yajun Deng +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kvm.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -191,7 +191,7 @@ void kvm_async_pf_task_wake(u32 token) + { + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; +- struct kvm_task_sleep_node *n; ++ struct kvm_task_sleep_node *n, *dummy = NULL; + + if (token == ~0) { + apf_task_wake_all(); +@@ -203,28 +203,41 @@ again: + n = _find_apf_task(b, token); + if (!n) { + /* +- * async PF was not yet handled. +- * Add dummy entry for the token. ++ * Async #PF not yet handled, add a dummy entry for the token. ++ * Allocating the token must be down outside of the raw lock ++ * as the allocator is preemptible on PREEMPT_RT kernels. + */ +- n = kzalloc(sizeof(*n), GFP_ATOMIC); +- if (!n) { ++ if (!dummy) { ++ raw_spin_unlock(&b->lock); ++ dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); ++ + /* +- * Allocation failed! Busy wait while other cpu +- * handles async PF. ++ * Continue looping on allocation failure, eventually ++ * the async #PF will be handled and allocating a new ++ * node will be unnecessary. ++ */ ++ if (!dummy) ++ cpu_relax(); ++ ++ /* ++ * Recheck for async #PF completion before enqueueing ++ * the dummy token to avoid duplicate list entries. + */ +- raw_spin_unlock(&b->lock); +- cpu_relax(); + goto again; + } +- n->token = token; +- n->cpu = smp_processor_id(); +- init_swait_queue_head(&n->wq); +- hlist_add_head(&n->link, &b->list); ++ dummy->token = token; ++ dummy->cpu = smp_processor_id(); ++ init_swait_queue_head(&dummy->wq); ++ hlist_add_head(&dummy->link, &b->list); ++ dummy = NULL; + } else { + apf_task_wake_one(n); + } + raw_spin_unlock(&b->lock); +- return; ++ ++ /* A dummy token might be allocated and ultimately not used. */ ++ if (dummy) ++ kfree(dummy); + } + EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); + diff --git a/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch b/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch new file mode 100644 index 00000000000..8ace75d0694 --- /dev/null +++ b/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch @@ -0,0 +1,81 @@ +From baec4f5a018fe2d708fc1022330dba04b38b5fe3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 24 May 2022 09:43:31 -0400 +Subject: x86, kvm: use correct GFP flags for preemption disabled + +From: Paolo Bonzini + +commit baec4f5a018fe2d708fc1022330dba04b38b5fe3 upstream. + +Commit ddd7ed842627 ("x86/kvm: Alloc dummy async #PF token outside of +raw spinlock") leads to the following Smatch static checker warning: + + arch/x86/kernel/kvm.c:212 kvm_async_pf_task_wake() + warn: sleeping in atomic context + +arch/x86/kernel/kvm.c + 202 raw_spin_lock(&b->lock); + 203 n = _find_apf_task(b, token); + 204 if (!n) { + 205 /* + 206 * Async #PF not yet handled, add a dummy entry for the token. + 207 * Allocating the token must be down outside of the raw lock + 208 * as the allocator is preemptible on PREEMPT_RT kernels. + 209 */ + 210 if (!dummy) { + 211 raw_spin_unlock(&b->lock); +--> 212 dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); + ^^^^^^^^^^ +Smatch thinks the caller has preempt disabled. The `smdb.py preempt +kvm_async_pf_task_wake` output call tree is: + +sysvec_kvm_asyncpf_interrupt() <- disables preempt +-> __sysvec_kvm_asyncpf_interrupt() + -> kvm_async_pf_task_wake() + +The caller is this: + +arch/x86/kernel/kvm.c + 290 DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) + 291 { + 292 struct pt_regs *old_regs = set_irq_regs(regs); + 293 u32 token; + 294 + 295 ack_APIC_irq(); + 296 + 297 inc_irq_stat(irq_hv_callback_count); + 298 + 299 if (__this_cpu_read(apf_reason.enabled)) { + 300 token = __this_cpu_read(apf_reason.token); + 301 kvm_async_pf_task_wake(token); + 302 __this_cpu_write(apf_reason.token, 0); + 303 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); + 304 } + 305 + 306 set_irq_regs(old_regs); + 307 } + +The DEFINE_IDTENTRY_SYSVEC() is a wrapper that calls this function +from the call_on_irqstack_cond(). It's inside the call_on_irqstack_cond() +where preempt is disabled (unless it's already disabled). The +irq_enter/exit_rcu() functions disable/enable preempt. + +Reported-by: Dan Carpenter +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -209,7 +209,7 @@ again: + */ + if (!dummy) { + raw_spin_unlock(&b->lock); +- dummy = kzalloc(sizeof(*dummy), GFP_KERNEL); ++ dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); + + /* + * Continue looping on allocation failure, eventually diff --git a/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch b/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch new file mode 100644 index 00000000000..b0bb6f1e256 --- /dev/null +++ b/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch @@ -0,0 +1,191 @@ +From 989b5db215a2f22f89d730b607b071d964780f10 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Wed, 2 Feb 2022 00:49:42 +0000 +Subject: x86/uaccess: Implement macros for CMPXCHG on user addresses + +From: Peter Zijlstra + +commit 989b5db215a2f22f89d730b607b071d964780f10 upstream. + +Add support for CMPXCHG loops on userspace addresses. Provide both an +"unsafe" version for tight loops that do their own uaccess begin/end, as +well as a "safe" version for use cases where the CMPXCHG is not buried in +a loop, e.g. KVM will resume the guest instead of looping when emulation +of a guest atomic accesses fails the CMPXCHG. + +Provide 8-byte versions for 32-bit kernels so that KVM can do CMPXCHG on +guest PAE PTEs, which are accessed via userspace addresses. + +Guard the asm_volatile_goto() variation with CC_HAS_ASM_GOTO_TIED_OUTPUT, +the "+m" constraint fails on some compilers that otherwise support +CC_HAS_ASM_GOTO_OUTPUT. + +Cc: stable@vger.kernel.org +Signed-off-by: Peter Zijlstra (Intel) +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Message-Id: <20220202004945.2540433-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/uaccess.h | 142 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 142 insertions(+) + +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -409,6 +409,103 @@ do { \ + + #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + ++#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm_volatile_goto("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ ++ _ASM_EXTABLE_UA(1b, %l[label]) \ ++ : CC_OUT(z) (success), \ ++ [ptr] "+m" (*_ptr), \ ++ [old] "+a" (__old) \ ++ : [new] ltype (__new) \ ++ : "memory" \ ++ : label); \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++ ++#ifdef CONFIG_X86_32 ++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm_volatile_goto("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ ++ _ASM_EXTABLE_UA(1b, %l[label]) \ ++ : CC_OUT(z) (success), \ ++ "+A" (__old), \ ++ [ptr] "+m" (*_ptr) \ ++ : "b" ((u32)__new), \ ++ "c" ((u32)((u64)__new >> 32)) \ ++ : "memory" \ ++ : label); \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++#endif // CONFIG_X86_32 ++#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ ++ int __err = 0; \ ++ bool success; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm volatile("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ ++ CC_SET(z) \ ++ "2:\n" \ ++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ ++ %[errout]) \ ++ : CC_OUT(z) (success), \ ++ [errout] "+r" (__err), \ ++ [ptr] "+m" (*_ptr), \ ++ [old] "+a" (__old) \ ++ : [new] ltype (__new) \ ++ : "memory", "cc"); \ ++ if (unlikely(__err)) \ ++ goto label; \ ++ if (unlikely(!success)) \ ++ *_old = __old; \ ++ likely(success); }) ++ ++#ifdef CONFIG_X86_32 ++/* ++ * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. ++ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are ++ * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses ++ * both ESI and EDI for the memory operand, compilation will fail if the error ++ * is an input+output as there will be no register available for input. ++ */ ++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ ++ int __result; \ ++ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ ++ __typeof__(*(_ptr)) __old = *_old; \ ++ __typeof__(*(_ptr)) __new = (_new); \ ++ asm volatile("\n" \ ++ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ ++ "mov $0, %%ecx\n\t" \ ++ "setz %%cl\n" \ ++ "2:\n" \ ++ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ ++ : [result]"=c" (__result), \ ++ "+A" (__old), \ ++ [ptr] "+m" (*_ptr) \ ++ : "b" ((u32)__new), \ ++ "c" ((u32)((u64)__new >> 32)) \ ++ : "memory", "cc"); \ ++ if (unlikely(__result < 0)) \ ++ goto label; \ ++ if (unlikely(!__result)) \ ++ *_old = __old; \ ++ likely(__result); }) ++#endif // CONFIG_X86_32 ++#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT ++ + /* FIXME: this hack is definitely wrong -AK */ + struct __large_struct { unsigned long buf[100]; }; + #define __m(x) (*(struct __large_struct __user *)(x)) +@@ -501,6 +598,51 @@ do { \ + } while (0) + #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT + ++extern void __try_cmpxchg_user_wrong_size(void); ++ ++#ifndef CONFIG_X86_32 ++#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ ++ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) ++#endif ++ ++/* ++ * Force the pointer to u to match the size expected by the asm helper. ++ * clang/LLVM compiles all cases and only discards the unused paths after ++ * processing errors, which breaks i386 if the pointer is an 8-byte value. ++ */ ++#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ ++ bool __ret; \ ++ __chk_user_ptr(_ptr); \ ++ switch (sizeof(*(_ptr))) { \ ++ case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ ++ (__force u8 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ ++ (__force u16 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ ++ (__force u32 *)(_ptr), (_oldp), \ ++ (_nval), _label); \ ++ break; \ ++ case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ ++ (_nval), _label); \ ++ break; \ ++ default: __try_cmpxchg_user_wrong_size(); \ ++ } \ ++ __ret; }) ++ ++/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ ++#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ ++ int __ret = -EFAULT; \ ++ __uaccess_begin_nospec(); \ ++ __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ ++_label: \ ++ __uaccess_end(); \ ++ __ret; \ ++ }) ++ + /* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. -- 2.47.3