]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdkfd: fixed page fault when enable MES shader debugger
authorJesse.zhang@amd.com <Jesse.zhang@amd.com>
Wed, 18 Dec 2024 10:23:52 +0000 (18:23 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 6 Jan 2025 20:13:37 +0000 (15:13 -0500)
Initialize the process context address before setting the shader debugger.

[  260.781212] amdgpu 0000:03:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:32 vmid:0 pasid:0)
[  260.781236] amdgpu 0000:03:00.0: amdgpu:   in page starting at address 0x0000000000000000 from client 10
[  260.781255] amdgpu 0000:03:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00040A40
[  260.781270] amdgpu 0000:03:00.0: amdgpu:      Faulty UTCL2 client ID: CPC (0x5)
[  260.781284] amdgpu 0000:03:00.0: amdgpu:      MORE_FAULTS: 0x0
[  260.781296] amdgpu 0000:03:00.0: amdgpu:      WALKER_ERROR: 0x0
[  260.781308] amdgpu 0000:03:00.0: amdgpu:      PERMISSION_FAULTS: 0x4
[  260.781320] amdgpu 0000:03:00.0: amdgpu:      MAPPING_ERROR: 0x0
[  260.781332] amdgpu 0000:03:00.0: amdgpu:      RW: 0x1
[  260.782017] amdgpu 0000:03:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:32 vmid:0 pasid:0)
[  260.782039] amdgpu 0000:03:00.0: amdgpu:   in page starting at address 0x0000000000000000 from client 10
[  260.782058] amdgpu 0000:03:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00040A41
[  260.782073] amdgpu 0000:03:00.0: amdgpu:      Faulty UTCL2 client ID: CPC (0x5)
[  260.782087] amdgpu 0000:03:00.0: amdgpu:      MORE_FAULTS: 0x1
[  260.782098] amdgpu 0000:03:00.0: amdgpu:      WALKER_ERROR: 0x0
[  260.782110] amdgpu 0000:03:00.0: amdgpu:      PERMISSION_FAULTS: 0x4
[  260.782122] amdgpu 0000:03:00.0: amdgpu:      MAPPING_ERROR: 0x0
[  260.782137] amdgpu 0000:03:00.0: amdgpu:      RW: 0x1
[  260.782155] amdgpu 0000:03:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:32 vmid:0 pasid:0)
[  260.782166] amdgpu 0000:03:00.0: amdgpu:   in page starting at address 0x0000000000000000 from client 10

Fixes: 438b39ac74e2 ("drm/amdkfd: pause autosuspend when creating pdd")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3849
Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 5b231f5bc9ff02ec5737f2ec95cdf15ac95088e9)
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdkfd/kfd_debug.c

index 312dfa84f29f84c2362e80e976a436f69c07da1a..a8abc309180137da6f00f7561ec33f27f4937ffc 100644 (file)
@@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
 {
        uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
        uint32_t flags = pdd->process->dbg_flags;
+       struct amdgpu_device *adev = pdd->dev->adev;
+       int r;
 
        if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
                return 0;
 
+       if (!pdd->proc_ctx_cpu_ptr) {
+                       r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+                               AMDGPU_MES_PROC_CTX_SIZE,
+                               &pdd->proc_ctx_bo,
+                               &pdd->proc_ctx_gpu_addr,
+                               &pdd->proc_ctx_cpu_ptr,
+                               false);
+               if (r) {
+                       dev_err(adev->dev,
+                       "failed to allocate process context bo\n");
+                       return r;
+               }
+               memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+       }
+
        return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
                                                pdd->watch_points, flags, sq_trap_en);
 }