]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: block CE CS if not explicitely allowed by module option
authorChristian König <christian.koenig@amd.com>
Mon, 22 Sep 2025 12:18:16 +0000 (14:18 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 13 Oct 2025 18:14:14 +0000 (14:14 -0400)
The Constant Engine found on gfx6-gfx10 HW has been a notorious source of
problems.

RADV never used it in the first place, radeonsi only used it for a few
releases around 2017 for gfx6-gfx9 before dropping support for it as
well.

While investigating another problem I just recently found that submitting
to the CE seems to be completely broken on gfx9 for quite a while.

Since nobody complained about that problem it most likely means that
nobody is using any of the affected radeonsi versions on current Linux
kernels any more.

So to potentially phase out the support for the CE and eliminate another
source of problems block submitting CE IBs unless it is enabled again
using a debug flag.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 2a0df4cabb99ab1ff53dfe8c1c837cdfe793f35b..6f5b4a0e0a343f67c309c5e24e5b9928399792ac 100644 (file)
@@ -1290,6 +1290,7 @@ struct amdgpu_device {
        bool                            debug_disable_gpu_ring_reset;
        bool                            debug_vm_userptr;
        bool                            debug_disable_ce_logs;
+       bool                            debug_enable_ce_cs;
 
        /* Protection for the following isolation structure */
        struct mutex                    enforce_isolation_mutex;
index 9cd7741d22545f26f48fce5739970727c7bb352a..ba9fb08db0947194c3c8d392394dfa04113286c8 100644 (file)
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
        if (p->uf_bo && ring->funcs->no_user_fence)
                return -EINVAL;
 
+       if (!p->adev->debug_enable_ce_cs &&
+           chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+               dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
+               return -EINVAL;
+       }
+
        if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
            chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
                if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
index bff25ef3e2d042a763f4638549a96cdcc126327c..61268aa82df4d6ce3ea9118575c7935b48595d7e 100644 (file)
@@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
        AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
        AMDGPU_DEBUG_SMU_POOL = BIT(7),
        AMDGPU_DEBUG_VM_USERPTR = BIT(8),
-       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
+       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+       AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
                pr_info("debug: disable kernel logs of correctable errors\n");
                adev->debug_disable_ce_logs = true;
        }
+
+       if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+               pr_info("debug: allowing command submission to CE engine\n");
+               adev->debug_enable_ce_cs = true;
+       }
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)