From: Greg Kroah-Hartman Date: Thu, 13 Jun 2024 07:21:27 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v4.19.316~76 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4fdd0fa2c21873779eb68631e50ce7a052d33911;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: 9p-add-missing-locking-around-taking-dentry-fid-list.patch crypto-ecdsa-fix-module-auto-load-on-add-key.patch crypto-ecrdsa-fix-module-auto-load-on-add_key.patch crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch drm-amd-fix-shutdown-again-on-some-smu-v13.0.4-11-platforms.patch kvm-arm64-aarch32-fix-spurious-trapping-of-conditional-instructions.patch kvm-arm64-allow-aarch32-pstate.m-to-be-restored-as-system-mode.patch kvm-arm64-fix-aarch32-register-narrowing-on-userspace-write.patch mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch --- diff --git a/queue-6.1/9p-add-missing-locking-around-taking-dentry-fid-list.patch b/queue-6.1/9p-add-missing-locking-around-taking-dentry-fid-list.patch new file mode 100644 index 00000000000..cfe3663fff5 --- /dev/null +++ b/queue-6.1/9p-add-missing-locking-around-taking-dentry-fid-list.patch @@ -0,0 +1,70 @@ +From c898afdc15645efb555acb6d85b484eb40a45409 Mon Sep 17 00:00:00 2001 +From: Dominique Martinet +Date: Tue, 21 May 2024 21:13:36 +0900 +Subject: 9p: add missing locking around taking dentry fid list + +From: Dominique Martinet + +commit c898afdc15645efb555acb6d85b484eb40a45409 upstream. + +Fix a use-after-free on dentry's d_fsdata fid list when a thread +looks up a fid through dentry while another thread unlinks it: + +UAF thread: +refcount_t: addition on 0; use-after-free. + p9_fid_get linux/./include/net/9p/client.h:262 + v9fs_fid_find+0x236/0x280 linux/fs/9p/fid.c:129 + v9fs_fid_lookup_with_uid linux/fs/9p/fid.c:181 + v9fs_fid_lookup+0xbf/0xc20 linux/fs/9p/fid.c:314 + v9fs_vfs_getattr_dotl+0xf9/0x360 linux/fs/9p/vfs_inode_dotl.c:400 + vfs_statx+0xdd/0x4d0 linux/fs/stat.c:248 + +Freed by: + p9_fid_destroy (inlined) + p9_client_clunk+0xb0/0xe0 linux/net/9p/client.c:1456 + p9_fid_put linux/./include/net/9p/client.h:278 + v9fs_dentry_release+0xb5/0x140 linux/fs/9p/vfs_dentry.c:55 + v9fs_remove+0x38f/0x620 linux/fs/9p/vfs_inode.c:518 + vfs_unlink+0x29a/0x810 linux/fs/namei.c:4335 + +The problem is that d_fsdata was not accessed under d_lock, because +d_release() normally is only called once the dentry is otherwise no +longer accessible but since we also call it explicitly in v9fs_remove +that lock is required: +move the hlist out of the dentry under lock then unref its fids once +they are no longer accessible. + +Fixes: 154372e67d40 ("fs/9p: fix create-unlink-getattr idiom") +Cc: stable@vger.kernel.org +Reported-by: Meysam Firouzi +Reported-by: Amirmohammad Eftekhar +Reviewed-by: Christian Schoenebeck +Message-ID: <20240521122947.1080227-1-asmadeus@codewreck.org> +Signed-off-by: Dominique Martinet +Signed-off-by: Greg Kroah-Hartman +--- + fs/9p/vfs_dentry.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/9p/vfs_dentry.c ++++ b/fs/9p/vfs_dentry.c +@@ -50,12 +50,17 @@ static int v9fs_cached_dentry_delete(con + static void v9fs_dentry_release(struct dentry *dentry) + { + struct hlist_node *p, *n; ++ struct hlist_head head; + + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", + dentry, dentry); +- hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) ++ ++ spin_lock(&dentry->d_lock); ++ hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head); ++ spin_unlock(&dentry->d_lock); ++ ++ hlist_for_each_safe(p, n, &head) + p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); +- dentry->d_fsdata = NULL; + } + + static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) diff --git a/queue-6.1/crypto-ecdsa-fix-module-auto-load-on-add-key.patch b/queue-6.1/crypto-ecdsa-fix-module-auto-load-on-add-key.patch new file mode 100644 index 00000000000..557a7d797c5 --- /dev/null +++ b/queue-6.1/crypto-ecdsa-fix-module-auto-load-on-add-key.patch @@ -0,0 +1,56 @@ +From 48e4fd6d54f54d0ceab5a952d73e47a9454a6ccb Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Thu, 21 Mar 2024 10:44:33 -0400 +Subject: crypto: ecdsa - Fix module auto-load on add-key + +From: Stefan Berger + +commit 48e4fd6d54f54d0ceab5a952d73e47a9454a6ccb upstream. + +Add module alias with the algorithm cra_name similar to what we have for +RSA-related and other algorithms. + +The kernel attempts to modprobe asymmetric algorithms using the names +"crypto-$cra_name" and "crypto-$cra_name-all." However, since these +aliases are currently missing, the modules are not loaded. For instance, +when using the `add_key` function, the hash algorithm is typically +loaded automatically, but the asymmetric algorithm is not. + +Steps to test: + +1. Create certificate + + openssl req -x509 -sha256 -newkey ec \ + -pkeyopt "ec_paramgen_curve:secp384r1" -keyout key.pem -days 365 \ + -subj '/CN=test' -nodes -outform der -out nist-p384.der + +2. Optionally, trace module requests with: trace-cmd stream -e module & + +3. Trigger add_key call for the cert: + + # keyctl padd asymmetric "" @u < nist-p384.der + 641069229 + # lsmod | head -2 + Module Size Used by + ecdsa_generic 16384 0 + +Fixes: c12d448ba939 ("crypto: ecdsa - Register NIST P384 and extend test suite") +Cc: stable@vger.kernel.org +Signed-off-by: Stefan Berger +Reviewed-by: Vitaly Chikunov +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + crypto/ecdsa.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/crypto/ecdsa.c ++++ b/crypto/ecdsa.c +@@ -373,4 +373,7 @@ module_exit(ecdsa_exit); + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Stefan Berger "); + MODULE_DESCRIPTION("ECDSA generic algorithm"); ++MODULE_ALIAS_CRYPTO("ecdsa-nist-p192"); ++MODULE_ALIAS_CRYPTO("ecdsa-nist-p256"); ++MODULE_ALIAS_CRYPTO("ecdsa-nist-p384"); + MODULE_ALIAS_CRYPTO("ecdsa-generic"); diff --git a/queue-6.1/crypto-ecrdsa-fix-module-auto-load-on-add_key.patch b/queue-6.1/crypto-ecrdsa-fix-module-auto-load-on-add_key.patch new file mode 100644 index 00000000000..9a66d175088 --- /dev/null +++ b/queue-6.1/crypto-ecrdsa-fix-module-auto-load-on-add_key.patch @@ -0,0 +1,64 @@ +From eb5739a1efbc9ff216271aeea0ebe1c92e5383e5 Mon Sep 17 00:00:00 2001 +From: Vitaly Chikunov +Date: Mon, 18 Mar 2024 03:42:40 +0300 +Subject: crypto: ecrdsa - Fix module auto-load on add_key + +From: Vitaly Chikunov + +commit eb5739a1efbc9ff216271aeea0ebe1c92e5383e5 upstream. + +Add module alias with the algorithm cra_name similar to what we have for +RSA-related and other algorithms. + +The kernel attempts to modprobe asymmetric algorithms using the names +"crypto-$cra_name" and "crypto-$cra_name-all." However, since these +aliases are currently missing, the modules are not loaded. For instance, +when using the `add_key` function, the hash algorithm is typically +loaded automatically, but the asymmetric algorithm is not. + +Steps to test: + +1. Cert is generated usings ima-evm-utils test suite with + `gen-keys.sh`, example cert is provided below: + + $ base64 -d >test-gost2012_512-A.cer < +Cc: stable@vger.kernel.org +Signed-off-by: Vitaly Chikunov +Tested-by: Stefan Berger +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + crypto/ecrdsa.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/crypto/ecrdsa.c ++++ b/crypto/ecrdsa.c +@@ -294,4 +294,5 @@ module_exit(ecrdsa_mod_fini); + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Vitaly Chikunov "); + MODULE_DESCRIPTION("EC-RDSA generic algorithm"); ++MODULE_ALIAS_CRYPTO("ecrdsa"); + MODULE_ALIAS_CRYPTO("ecrdsa-generic"); diff --git a/queue-6.1/crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch b/queue-6.1/crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch new file mode 100644 index 00000000000..c5abf26fc5c --- /dev/null +++ b/queue-6.1/crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch @@ -0,0 +1,71 @@ +From d3b17c6d9dddc2db3670bc9be628b122416a3d26 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Wed, 8 May 2024 16:39:51 +0800 +Subject: crypto: qat - Fix ADF_DEV_RESET_SYNC memory leak + +From: Herbert Xu + +commit d3b17c6d9dddc2db3670bc9be628b122416a3d26 upstream. + +Using completion_done to determine whether the caller has gone +away only works after a complete call. Furthermore it's still +possible that the caller has not yet called wait_for_completion, +resulting in another potential UAF. + +Fix this by making the caller use cancel_work_sync and then freeing +the memory safely. + +Fixes: 7d42e097607c ("crypto: qat - resolve race condition during AER recovery") +Cc: #6.8+ +Signed-off-by: Herbert Xu +Reviewed-by: Giovanni Cabiddu +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/qat/qat_common/adf_aer.c | 19 +++++-------------- + 1 file changed, 5 insertions(+), 14 deletions(-) + +--- a/drivers/crypto/qat/qat_common/adf_aer.c ++++ b/drivers/crypto/qat/qat_common/adf_aer.c +@@ -95,8 +95,7 @@ static void adf_device_reset_worker(stru + if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) { + /* The device hanged and we can't restart it so stop here */ + dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); +- if (reset_data->mode == ADF_DEV_RESET_ASYNC || +- completion_done(&reset_data->compl)) ++ if (reset_data->mode == ADF_DEV_RESET_ASYNC) + kfree(reset_data); + WARN(1, "QAT: device restart failed. Device is unusable\n"); + return; +@@ -104,16 +103,8 @@ static void adf_device_reset_worker(stru + adf_dev_restarted_notify(accel_dev); + clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); + +- /* +- * The dev is back alive. Notify the caller if in sync mode +- * +- * If device restart will take a more time than expected, +- * the schedule_reset() function can timeout and exit. This can be +- * detected by calling the completion_done() function. In this case +- * the reset_data structure needs to be freed here. +- */ +- if (reset_data->mode == ADF_DEV_RESET_ASYNC || +- completion_done(&reset_data->compl)) ++ /* The dev is back alive. Notify the caller if in sync mode */ ++ if (reset_data->mode == ADF_DEV_RESET_ASYNC) + kfree(reset_data); + else + complete(&reset_data->compl); +@@ -148,10 +139,10 @@ static int adf_dev_aer_schedule_reset(st + if (!timeout) { + dev_err(&GET_DEV(accel_dev), + "Reset device timeout expired\n"); ++ cancel_work_sync(&reset_data->reset_work); + ret = -EFAULT; +- } else { +- kfree(reset_data); + } ++ kfree(reset_data); + return ret; + } + return 0; diff --git a/queue-6.1/drm-amd-fix-shutdown-again-on-some-smu-v13.0.4-11-platforms.patch b/queue-6.1/drm-amd-fix-shutdown-again-on-some-smu-v13.0.4-11-platforms.patch new file mode 100644 index 00000000000..de68698aae9 --- /dev/null +++ b/queue-6.1/drm-amd-fix-shutdown-again-on-some-smu-v13.0.4-11-platforms.patch @@ -0,0 +1,61 @@ +From 267cace556e8a53d703119f7435ab556209e5b6a Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Sun, 26 May 2024 07:59:08 -0500 +Subject: drm/amd: Fix shutdown (again) on some SMU v13.0.4/11 platforms + +From: Mario Limonciello + +commit 267cace556e8a53d703119f7435ab556209e5b6a upstream. + +commit cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for +some SMU 13.0.4/13.0.11 users") attempted to fix shutdown issues +that were reported since commit 31729e8c21ec ("drm/amd/pm: fixes a +random hang in S4 for SMU v13.0.4/11") but caused issues for some +people. + +Adjust the workaround flow to properly only apply in the S4 case: +-> For shutdown go through SMU_MSG_PrepareMp1ForUnload +-> For S4 go through SMU_MSG_GfxDeviceDriverReset and + SMU_MSG_PrepareMp1ForUnload + +Reported-and-tested-by: lectrode +Closes: https://github.com/void-linux/void-packages/issues/50417 +Cc: stable@vger.kernel.org +Fixes: cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users") +Reviewed-by: Tim Huang +Signed-off-by: Mario Limonciello +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 20 ++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +@@ -222,15 +222,17 @@ static int smu_v13_0_4_system_features_c + struct amdgpu_device *adev = smu->adev; + int ret = 0; + +- if (!en && adev->in_s4) { +- /* Adds a GFX reset as workaround just before sending the +- * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering +- * an invalid state. +- */ +- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, +- SMU_RESET_MODE_2, NULL); +- if (ret) +- return ret; ++ if (!en && !adev->in_s0ix) { ++ if (adev->in_s4) { ++ /* Adds a GFX reset as workaround just before sending the ++ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering ++ * an invalid state. ++ */ ++ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, ++ SMU_RESET_MODE_2, NULL); ++ if (ret) ++ return ret; ++ } + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); + } diff --git a/queue-6.1/kvm-arm64-aarch32-fix-spurious-trapping-of-conditional-instructions.patch b/queue-6.1/kvm-arm64-aarch32-fix-spurious-trapping-of-conditional-instructions.patch new file mode 100644 index 00000000000..3f56f3862d1 --- /dev/null +++ b/queue-6.1/kvm-arm64-aarch32-fix-spurious-trapping-of-conditional-instructions.patch @@ -0,0 +1,65 @@ +From c92e8b9eacebb4060634ebd9395bba1b29aadc68 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 24 May 2024 15:19:56 +0100 +Subject: KVM: arm64: AArch32: Fix spurious trapping of conditional instructions + +From: Marc Zyngier + +commit c92e8b9eacebb4060634ebd9395bba1b29aadc68 upstream. + +We recently upgraded the view of ESR_EL2 to 64bit, in keeping with +the requirements of the architecture. + +However, the AArch32 emulation code was left unaudited, and the +(already dodgy) code that triages whether a trap is spurious or not +(because the condition code failed) broke in a subtle way: + +If ESR_EL2.ISS2 is ever non-zero (unlikely, but hey, this is the ARM +architecture we're talking about), the hack that tests the top bits +of ESR_EL2.EC will break in an interesting way. + +Instead, use kvm_vcpu_trap_get_class() to obtain the EC, and list +all the possible ECs that can fail a condition code check. + +While we're at it, add SMC32 to the list, as it is explicitly listed +as being allowed to trap despite failing a condition code check (as +described in the HCR_EL2.TSC documentation). + +Fixes: 0b12620fddb8 ("KVM: arm64: Treat ESR_EL2 as a 64-bit register") +Cc: stable@vger.kernel.org +Acked-by: Oliver Upton +Link: https://lore.kernel.org/r/20240524141956.1450304-4-maz@kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/hyp/aarch32.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kvm/hyp/aarch32.c ++++ b/arch/arm64/kvm/hyp/aarch32.c +@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct + u32 cpsr_cond; + int cond; + +- /* Top two bits non-zero? Unconditional. */ +- if (kvm_vcpu_get_esr(vcpu) >> 30) ++ /* ++ * These are the exception classes that could fire with a ++ * conditional instruction. ++ */ ++ switch (kvm_vcpu_trap_get_class(vcpu)) { ++ case ESR_ELx_EC_CP15_32: ++ case ESR_ELx_EC_CP15_64: ++ case ESR_ELx_EC_CP14_MR: ++ case ESR_ELx_EC_CP14_LS: ++ case ESR_ELx_EC_FP_ASIMD: ++ case ESR_ELx_EC_CP10_ID: ++ case ESR_ELx_EC_CP14_64: ++ case ESR_ELx_EC_SVC32: ++ break; ++ default: + return true; ++ } + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); diff --git a/queue-6.1/kvm-arm64-allow-aarch32-pstate.m-to-be-restored-as-system-mode.patch b/queue-6.1/kvm-arm64-allow-aarch32-pstate.m-to-be-restored-as-system-mode.patch new file mode 100644 index 00000000000..6e8b21db581 --- /dev/null +++ b/queue-6.1/kvm-arm64-allow-aarch32-pstate.m-to-be-restored-as-system-mode.patch @@ -0,0 +1,34 @@ +From dfe6d190f38fc5df5ff2614b463a5195a399c885 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 24 May 2024 15:19:55 +0100 +Subject: KVM: arm64: Allow AArch32 PSTATE.M to be restored as System mode + +From: Marc Zyngier + +commit dfe6d190f38fc5df5ff2614b463a5195a399c885 upstream. + +It appears that we don't allow a vcpu to be restored in AArch32 +System mode, as we *never* included it in the list of valid modes. + +Just add it to the list of allowed modes. + +Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu") +Cc: stable@vger.kernel.org +Acked-by: Oliver Upton +Link: https://lore.kernel.org/r/20240524141956.1450304-3-maz@kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/guest.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/kvm/guest.c ++++ b/arch/arm64/kvm/guest.c +@@ -250,6 +250,7 @@ static int set_core_reg(struct kvm_vcpu + case PSR_AA32_MODE_SVC: + case PSR_AA32_MODE_ABT: + case PSR_AA32_MODE_UND: ++ case PSR_AA32_MODE_SYS: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; diff --git a/queue-6.1/kvm-arm64-fix-aarch32-register-narrowing-on-userspace-write.patch b/queue-6.1/kvm-arm64-fix-aarch32-register-narrowing-on-userspace-write.patch new file mode 100644 index 00000000000..f42bcf4e70f --- /dev/null +++ b/queue-6.1/kvm-arm64-fix-aarch32-register-narrowing-on-userspace-write.patch @@ -0,0 +1,49 @@ +From 947051e361d551e0590777080ffc4926190f62f2 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Fri, 24 May 2024 15:19:54 +0100 +Subject: KVM: arm64: Fix AArch32 register narrowing on userspace write + +From: Marc Zyngier + +commit 947051e361d551e0590777080ffc4926190f62f2 upstream. + +When userspace writes to one of the core registers, we make +sure to narrow the corresponding GPRs if PSTATE indicates +an AArch32 context. + +The code tries to check whether the context is EL0 or EL1 so +that it narrows the correct registers. But it does so by checking +the full PSTATE instead of PSTATE.M. + +As a consequence, and if we are restoring an AArch32 EL0 context +in a 64bit guest, and that PSTATE has *any* bit set outside of +PSTATE.M, we narrow *all* registers instead of only the first 15, +destroying the 64bit state. + +Obviously, this is not something the guest is likely to enjoy. + +Correctly masking PSTATE to only evaluate PSTATE.M fixes it. + +Fixes: 90c1f934ed71 ("KVM: arm64: Get rid of the AArch32 register mapping code") +Reported-by: Nina Schoetterl-Glausch +Cc: stable@vger.kernel.org +Reviewed-by: Nina Schoetterl-Glausch +Acked-by: Oliver Upton +Link: https://lore.kernel.org/r/20240524141956.1450304-2-maz@kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/guest.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/kvm/guest.c ++++ b/arch/arm64/kvm/guest.c +@@ -270,7 +270,7 @@ static int set_core_reg(struct kvm_vcpu + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i, nr_reg; + +- switch (*vcpu_cpsr(vcpu)) { ++ switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. diff --git a/queue-6.1/mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch b/queue-6.1/mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch new file mode 100644 index 00000000000..cb40ffab627 --- /dev/null +++ b/queue-6.1/mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch @@ -0,0 +1,225 @@ +From 3a5a8d343e1cf96eb9971b17cbd4b832ab19b8e7 Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Wed, 1 May 2024 15:33:10 +0100 +Subject: mm: fix race between __split_huge_pmd_locked() and GUP-fast + +From: Ryan Roberts + +commit 3a5a8d343e1cf96eb9971b17cbd4b832ab19b8e7 upstream. + +__split_huge_pmd_locked() can be called for a present THP, devmap or +(non-present) migration entry. It calls pmdp_invalidate() unconditionally +on the pmdp and only determines if it is present or not based on the +returned old pmd. This is a problem for the migration entry case because +pmd_mkinvalid(), called by pmdp_invalidate() must only be called for a +present pmd. + +On arm64 at least, pmd_mkinvalid() will mark the pmd such that any future +call to pmd_present() will return true. And therefore any lockless +pgtable walker could see the migration entry pmd in this state and start +interpretting the fields as if it were present, leading to BadThings (TM). +GUP-fast appears to be one such lockless pgtable walker. + +x86 does not suffer the above problem, but instead pmd_mkinvalid() will +corrupt the offset field of the swap entry within the swap pte. See link +below for discussion of that problem. + +Fix all of this by only calling pmdp_invalidate() for a present pmd. And +for good measure let's add a warning to all implementations of +pmdp_invalidate[_ad](). I've manually reviewed all other +pmdp_invalidate[_ad]() call sites and believe all others to be conformant. + +This is a theoretical bug found during code review. I don't have any test +case to trigger it in practice. + +Link: https://lkml.kernel.org/r/20240501143310.1381675-1-ryan.roberts@arm.com +Link: https://lore.kernel.org/all/0dd7827a-6334-439a-8fd0-43c98e6af22b@arm.com/ +Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") +Signed-off-by: Ryan Roberts +Reviewed-by: Zi Yan +Reviewed-by: Anshuman Khandual +Acked-by: David Hildenbrand +Cc: Andreas Larsson +Cc: Andy Lutomirski +Cc: Aneesh Kumar K.V +Cc: Borislav Petkov (AMD) +Cc: Catalin Marinas +Cc: Christian Borntraeger +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: "David S. Miller" +Cc: Ingo Molnar +Cc: Jonathan Corbet +Cc: Mark Rutland +Cc: Naveen N. Rao +Cc: Nicholas Piggin +Cc: Peter Zijlstra +Cc: Sven Schnelle +Cc: Thomas Gleixner +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/mm/arch_pgtable_helpers.rst | 6 ++- + arch/powerpc/mm/book3s64/pgtable.c | 1 + arch/s390/include/asm/pgtable.h | 4 +- + arch/sparc/mm/tlb.c | 1 + arch/x86/mm/pgtable.c | 2 + + mm/huge_memory.c | 49 +++++++++++++++--------------- + mm/pgtable-generic.c | 2 + + 7 files changed, 39 insertions(+), 26 deletions(-) + +--- a/Documentation/mm/arch_pgtable_helpers.rst ++++ b/Documentation/mm/arch_pgtable_helpers.rst +@@ -136,7 +136,8 @@ PMD Page Table Helpers + +---------------------------+--------------------------------------------------+ + | pmd_swp_clear_soft_dirty | Clears a soft dirty swapped PMD | + +---------------------------+--------------------------------------------------+ +-| pmd_mkinvalid | Invalidates a mapped PMD [1] | ++| pmd_mkinvalid | Invalidates a present PMD; do not call for | ++| | non-present PMD [1] | + +---------------------------+--------------------------------------------------+ + | pmd_set_huge | Creates a PMD huge mapping | + +---------------------------+--------------------------------------------------+ +@@ -192,7 +193,8 @@ PUD Page Table Helpers + +---------------------------+--------------------------------------------------+ + | pud_mkdevmap | Creates a ZONE_DEVICE mapped PUD | + +---------------------------+--------------------------------------------------+ +-| pud_mkinvalid | Invalidates a mapped PUD [1] | ++| pud_mkinvalid | Invalidates a present PUD; do not call for | ++| | non-present PUD [1] | + +---------------------------+--------------------------------------------------+ + | pud_set_huge | Creates a PUD huge mapping | + +---------------------------+--------------------------------------------------+ +--- a/arch/powerpc/mm/book3s64/pgtable.c ++++ b/arch/powerpc/mm/book3s64/pgtable.c +@@ -124,6 +124,7 @@ pmd_t pmdp_invalidate(struct vm_area_str + { + unsigned long old_pmd; + ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return __pmd(old_pmd); +--- a/arch/s390/include/asm/pgtable.h ++++ b/arch/s390/include/asm/pgtable.h +@@ -1686,8 +1686,10 @@ static inline pmd_t pmdp_huge_clear_flus + static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) + { +- pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); ++ pmd_t pmd; + ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); ++ pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); + } + +--- a/arch/sparc/mm/tlb.c ++++ b/arch/sparc/mm/tlb.c +@@ -245,6 +245,7 @@ pmd_t pmdp_invalidate(struct vm_area_str + { + pmd_t old, entry; + ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID); + old = pmdp_establish(vma, address, pmdp, entry); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -615,6 +615,8 @@ int pmdp_clear_flush_young(struct vm_are + pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) + { ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); ++ + /* + * No flush is necessary. Once an invalid PTE is established, the PTE's + * access and dirty bits cannot be updated. +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2108,32 +2108,11 @@ static void __split_huge_pmd_locked(stru + return __split_huge_zero_page_pmd(vma, haddr, pmd); + } + +- /* +- * Up to this point the pmd is present and huge and userland has the +- * whole access to the hugepage during the split (which happens in +- * place). If we overwrite the pmd with the not-huge version pointing +- * to the pte here (which of course we could if all CPUs were bug +- * free), userland could trigger a small page size TLB miss on the +- * small sized TLB while the hugepage TLB entry is still established in +- * the huge TLB. Some CPU doesn't like that. +- * See http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum +- * 383 on page 105. Intel should be safe but is also warns that it's +- * only safe if the permission and cache attributes of the two entries +- * loaded in the two TLB is identical (which should be the case here). +- * But it is generally safer to never allow small and huge TLB entries +- * for the same virtual address to be loaded simultaneously. So instead +- * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the +- * current pmd notpresent (atomically because here the pmd_trans_huge +- * must remain set at all times on the pmd until the split is complete +- * for this pmd), then we flush the SMP TLB and finally we write the +- * non-huge version of the pmd entry with pmd_populate. +- */ +- old_pmd = pmdp_invalidate(vma, haddr, pmd); +- +- pmd_migration = is_pmd_migration_entry(old_pmd); ++ pmd_migration = is_pmd_migration_entry(*pmd); + if (unlikely(pmd_migration)) { + swp_entry_t entry; + ++ old_pmd = *pmd; + entry = pmd_to_swp_entry(old_pmd); + page = pfn_swap_entry_to_page(entry); + write = is_writable_migration_entry(entry); +@@ -2144,6 +2123,30 @@ static void __split_huge_pmd_locked(stru + soft_dirty = pmd_swp_soft_dirty(old_pmd); + uffd_wp = pmd_swp_uffd_wp(old_pmd); + } else { ++ /* ++ * Up to this point the pmd is present and huge and userland has ++ * the whole access to the hugepage during the split (which ++ * happens in place). If we overwrite the pmd with the not-huge ++ * version pointing to the pte here (which of course we could if ++ * all CPUs were bug free), userland could trigger a small page ++ * size TLB miss on the small sized TLB while the hugepage TLB ++ * entry is still established in the huge TLB. Some CPU doesn't ++ * like that. See ++ * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum ++ * 383 on page 105. Intel should be safe but is also warns that ++ * it's only safe if the permission and cache attributes of the ++ * two entries loaded in the two TLB is identical (which should ++ * be the case here). But it is generally safer to never allow ++ * small and huge TLB entries for the same virtual address to be ++ * loaded simultaneously. So instead of doing "pmd_populate(); ++ * flush_pmd_tlb_range();" we first mark the current pmd ++ * notpresent (atomically because here the pmd_trans_huge must ++ * remain set at all times on the pmd until the split is ++ * complete for this pmd), then we flush the SMP TLB and finally ++ * we write the non-huge version of the pmd entry with ++ * pmd_populate. ++ */ ++ old_pmd = pmdp_invalidate(vma, haddr, pmd); + page = pmd_page(old_pmd); + if (pmd_dirty(old_pmd)) { + dirty = true; +--- a/mm/pgtable-generic.c ++++ b/mm/pgtable-generic.c +@@ -195,6 +195,7 @@ pgtable_t pgtable_trans_huge_withdraw(st + pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) + { ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return old; +@@ -205,6 +206,7 @@ pmd_t pmdp_invalidate(struct vm_area_str + pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) + { ++ VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + return pmdp_invalidate(vma, address, pmdp); + } + #endif diff --git a/queue-6.1/series b/queue-6.1/series index 81cbf55d7ae..583025b2e7a 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -43,3 +43,12 @@ mmc-sdhci-acpi-disable-write-protect-detection-on-toshiba-wt10-a.patch mmc-sdhci-acpi-add-quirk-to-enable-pull-up-on-the-card-detect-gpio-on-asus-t100ta.patch fbdev-savage-handle-err-return-when-savagefb_check_var-failed.patch drm-amdgpu-atomfirmware-add-intergrated-info-v2.3-table.patch +9p-add-missing-locking-around-taking-dentry-fid-list.patch +drm-amd-fix-shutdown-again-on-some-smu-v13.0.4-11-platforms.patch +kvm-arm64-fix-aarch32-register-narrowing-on-userspace-write.patch +kvm-arm64-allow-aarch32-pstate.m-to-be-restored-as-system-mode.patch +kvm-arm64-aarch32-fix-spurious-trapping-of-conditional-instructions.patch +crypto-ecdsa-fix-module-auto-load-on-add-key.patch +crypto-ecrdsa-fix-module-auto-load-on-add_key.patch +crypto-qat-fix-adf_dev_reset_sync-memory-leak.patch +mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch