]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu/mes: keep enforce isolation up to date
authorAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Feb 2025 17:32:30 +0000 (12:32 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 25 Feb 2025 17:23:48 +0000 (12:23 -0500)
Re-send the mes message on resume to make sure the
mes state is up to date.

Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES")
Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Shaoyun Liu <shaoyun.liu@amd.com>
Cc: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 27b791514789844e80da990c456c2465325e0851)

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c

index c6aff3ddb42d770ef0908184d64c3a02d875f10c..c1f35ded684e81548ebb1f05281c21bdfc7a7349 100644 (file)
@@ -1638,24 +1638,19 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
        }
 
        mutex_lock(&adev->enforce_isolation_mutex);
-
        for (i = 0; i < num_partitions; i++) {
-               if (adev->enforce_isolation[i] && !partition_values[i]) {
+               if (adev->enforce_isolation[i] && !partition_values[i])
                        /* Going from enabled to disabled */
                        amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
-                       if (adev->enable_mes && adev->gfx.enable_cleaner_shader)
-                               amdgpu_mes_set_enforce_isolation(adev, i, false);
-               } else if (!adev->enforce_isolation[i] && partition_values[i]) {
+               else if (!adev->enforce_isolation[i] && partition_values[i])
                        /* Going from disabled to enabled */
                        amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
-                       if (adev->enable_mes && adev->gfx.enable_cleaner_shader)
-                               amdgpu_mes_set_enforce_isolation(adev, i, true);
-               }
                adev->enforce_isolation[i] = partition_values[i];
        }
-
        mutex_unlock(&adev->enforce_isolation_mutex);
 
+       amdgpu_mes_update_enforce_isolation(adev);
+
        return count;
 }
 
index 32b27a1658e7823c4813301977c403cfb738b111..709c11cbeabd885549078871ba4b132cba8d6288 100644 (file)
@@ -1681,7 +1681,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
 }
 
 /* Fix me -- node_id is used to identify the correct MES instances in the future */
-int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable)
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+                                           uint32_t node_id, bool enable)
 {
        struct mes_misc_op_input op_input = {0};
        int r;
@@ -1703,6 +1704,23 @@ error:
        return r;
 }
 
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+       int i, r = 0;
+
+       if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+               mutex_lock(&adev->enforce_isolation_mutex);
+               for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+                       if (adev->enforce_isolation[i])
+                               r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+                       else
+                               r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+               }
+               mutex_unlock(&adev->enforce_isolation_mutex);
+       }
+       return r;
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
index 2df2444ee892cd1d8cac7efc97c886e57ed28db8..e98ea7ede1bab20930fd1617fa83ebdf749f90c3 100644 (file)
@@ -534,6 +534,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
 
 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
 
-int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable);
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
 
 #endif /* __AMDGPU_MES_H__ */
index 65f389eb65e5fa4e2d2319f312ab1ee4e83a6d66..f9a4d08eef92591bd36ec10e2a12ab6e458bd1a8 100644 (file)
@@ -1633,6 +1633,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
                goto failure;
        }
 
+       r = amdgpu_mes_update_enforce_isolation(adev);
+       if (r)
+               goto failure;
+
 out:
        /*
         * Disable KIQ ring usage from the driver once MES is enabled.
index 901e924e69ad94813c64abccfcc3d0101132c641..0fd0fa6ed51843b3bd280171190fa0832d0f51dd 100644 (file)
@@ -1743,6 +1743,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
                goto failure;
        }
 
+       r = amdgpu_mes_update_enforce_isolation(adev);
+       if (r)
+               goto failure;
+
 out:
        /*
         * Disable KIQ ring usage from the driver once MES is enabled.