]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: implement retry fault handling for Navi
authorChristian König <christian.koenig@amd.com>
Tue, 3 Nov 2020 13:40:57 +0000 (14:40 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 24 Nov 2020 17:07:01 +0000 (12:07 -0500)
Same as gmc9, basically filter the fault, reroute or handle it.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index cffc3ca8fcde18aa9d388506a23e2f81053f543e..4f6e44e21691d95d9f03b907ba268f2a09e88814 100644 (file)
@@ -94,6 +94,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
                                       struct amdgpu_irq_src *source,
                                       struct amdgpu_iv_entry *entry)
 {
+       bool retry_fault = !!(entry->src_data[1] & 0x80);
        struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
        struct amdgpu_task_info task_info;
        uint32_t status = 0;
@@ -102,6 +103,30 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
        addr = (u64)entry->src_data[0] << 12;
        addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
+       if (retry_fault) {
+               /* Returning 1 here also prevents sending the IV to the KFD */
+
+               /* Process it onyl if it's the first fault for this address */
+               if (entry->ih != &adev->irq.ih_soft &&
+                   amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+                                            entry->timestamp))
+                       return 1;
+
+               /* Delegate it to a different ring if the hardware hasn't
+                * already done it.
+                */
+               if (in_interrupt()) {
+                       amdgpu_irq_delegate(adev, entry, 8);
+                       return 1;
+               }
+
+               /* Try to handle the recoverable page faults by filling page
+                * tables
+                */
+               if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+                       return 1;
+       }
+
        if (!amdgpu_sriov_vf(adev)) {
                /*
                 * Issue a dummy read to wait for the status register to