]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu: use atomic functions with memory barriers for vm fault info
authorGui-Dong Han <hanguidong02@gmail.com>
Tue, 21 Oct 2025 13:34:32 +0000 (09:34 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Oct 2025 13:01:24 +0000 (14:01 +0100)
[ Upstream commit 6df8e84aa6b5b1812cc2cacd6b3f5ccbb18cda2b ]

The atomic variable vm_fault_info_updated is used to synchronize access to
adev->gmc.vm_fault_info between the interrupt handler and
get_vm_fault_info().

The default atomic functions like atomic_set() and atomic_read() do not
provide memory barriers. This allows for CPU instruction reordering,
meaning the memory accesses to vm_fault_info and the vm_fault_info_updated
flag are not guaranteed to occur in the intended order. This creates a
race condition that can lead to inconsistent or stale data being used.

The previous implementation, which used an explicit mb(), was incomplete
and inefficient. It failed to account for all potential CPU reorderings,
such as the access of vm_fault_info being reordered before the atomic_read
of the flag. This approach is also more verbose and less performant than
using the proper atomic functions with acquire/release semantics.

Fix this by switching to atomic_set_release() and atomic_read_acquire().
These functions provide the necessary acquire and release semantics,
which act as memory barriers to ensure the correct order of operations.
It is also more efficient and idiomatic than using explicit full memory
barriers.

Fixes: b97dfa27ef3a ("drm/amdgpu: save vm fault information for amdkfd")
Cc: stable@vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02@gmail.com>
Signed-off-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
[ kept kgd_dev parameter and adev cast in amdgpu_amdkfd_gpuvm_get_vm_fault_info ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

index 4a95a624fca7b2f8e24319d44c7f3219147f6e21..53efc07cf42431f2b10b8b3e72a20c0364fa4cf9 100644 (file)
@@ -1640,10 +1640,9 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
        struct amdgpu_device *adev;
 
        adev = (struct amdgpu_device *)kgd;
-       if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+       if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
                *mem = *adev->gmc.vm_fault_info;
-               mb();
-               atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+               atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
        }
        return 0;
 }
index 80c146df338aaea3fc994a8f8f30f7c1519cffef..a5e78036ae457c6a494c67be600b1a782dcff63a 100644 (file)
@@ -1067,7 +1067,7 @@ static int gmc_v7_0_sw_init(void *handle)
                                        GFP_KERNEL);
        if (!adev->gmc.vm_fault_info)
                return -ENOMEM;
-       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+       atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
 
        return 0;
 }
@@ -1297,7 +1297,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
        vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
                             VMID);
        if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
-               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
                struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
                u32 protections = REG_GET_FIELD(status,
                                        VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1313,8 +1313,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
                info->prot_read = protections & 0x8 ? true : false;
                info->prot_write = protections & 0x10 ? true : false;
                info->prot_exec = protections & 0x20 ? true : false;
-               mb();
-               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+               atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
        }
 
        return 0;
index 873bc33912e23e74fee63e8404537dfb942a8dd3..8a8bbbb28dc15754ae2cdcce3b1e2056d24fddb3 100644 (file)
@@ -1199,7 +1199,7 @@ static int gmc_v8_0_sw_init(void *handle)
                                        GFP_KERNEL);
        if (!adev->gmc.vm_fault_info)
                return -ENOMEM;
-       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+       atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
 
        return 0;
 }
@@ -1488,7 +1488,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
        vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
                             VMID);
        if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
-               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
                struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
                u32 protections = REG_GET_FIELD(status,
                                        VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1504,8 +1504,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
                info->prot_read = protections & 0x8 ? true : false;
                info->prot_write = protections & 0x10 ? true : false;
                info->prot_exec = protections & 0x20 ? true : false;
-               mb();
-               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+               atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
        }
 
        return 0;