]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge tag 'amd-drm-next-6.17-2025-07-01' of https://gitlab.freedesktop.org/agd5f...
authorDave Airlie <airlied@redhat.com>
Fri, 4 Jul 2025 00:06:22 +0000 (10:06 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 4 Jul 2025 00:06:29 +0000 (10:06 +1000)
amd-drm-next-6.17-2025-07-01:

amdgpu:
- FAMS2 fixes
- OLED fixes
- Misc cleanups
- AUX fixes
- DMCUB updates
- SR-IOV hibernation support
- RAS updates
- DP tunneling fixes
- DML2 fixes
- Backlight improvements
- Suspend improvements
- Use scaling for non-native modes on eDP
- SDMA 4.4.x fixes
- PCIe DPM fixes
- SDMA 5.x fixes
- Cleaner shader updates for GC 9.x
- Remove fence slab
- ISP genpd support
- Parition handling rework
- SDMA FW checks for userq support
- Add missing firmware declaration
- Fix leak in amdgpu_ctx_mgr_entity_fini()
- Freesync fix
- Ring reset refactoring
- Legacy dpm verbosity changes

amdkfd:
- GWS fix
- mtype fix for ext coherent system memory
- MMU notifier fix
- gfx7/8 fix

radeon:
- CS validation support for additional GL extensions
- Bump driver version for new CS validation checks

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20250701194707.32905-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
1  2 
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c

index 8cecf25996edc19146fa851c9abae14ac9965011,3d170060282eeab83de3ebcae823e14ac45af3ec..2c3547f4cea420c94dffc60d6931fd80888bae35
@@@ -322,11 -283,13 +283,13 @@@ bool amdgpu_fence_process(struct amdgpu
   */
  static void amdgpu_fence_fallback(struct timer_list *t)
  {
 -      struct amdgpu_ring *ring = from_timer(ring, t,
 -                                            fence_drv.fallback_timer);
 +      struct amdgpu_ring *ring = timer_container_of(ring, t,
 +                                                    fence_drv.fallback_timer);
  
        if (amdgpu_fence_process(ring))
-               DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+               dev_warn(ring->adev->dev,
+                        "Fence fallback timer expired on ring %s\n",
+                        ring->name);
  }
  
  /**
index 13c60cac4261bacb8f9da5bcb18b139b6fd8eee9,cf988077a3eefc28631cf49df351d601b126ef03..183fa33c243479426f932c42c27f677f02c6e4bd
@@@ -729,10 -733,10 +733,10 @@@ static const struct irq_domain_ops amdg
   */
  int amdgpu_irq_add_domain(struct amdgpu_device *adev)
  {
 -      adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
 -                                               &amdgpu_hw_irqdomain_ops, adev);
 +      adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
 +                                                  &amdgpu_hw_irqdomain_ops, adev);
        if (!adev->irq.domain) {
-               DRM_ERROR("GPU irq add domain failed\n");
+               dev_err(adev->dev, "GPU irq add domain failed\n");
                return -ENODEV;
        }
  
index 1e24590ae1449f49e4632fbf2b931e04c03af8d5,f0b7080dccb8d455a1957bc59b6beeb327ca8639..2b58e353cca154223ee5314f0285cc1f805430f6
@@@ -89,11 -89,9 +89,10 @@@ static enum drm_gpu_sched_stat amdgpu_j
  {
        struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
        struct amdgpu_job *job = to_amdgpu_job(s_job);
 +      struct drm_wedge_task_info *info = NULL;
        struct amdgpu_task_info *ti;
        struct amdgpu_device *adev = ring->adev;
-       int idx;
-       int r;
+       int idx, r;
  
        if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
                dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
        if (unlikely(adev->debug_disable_gpu_ring_reset)) {
                dev_err(adev->dev, "Ring reset disabled by debug mask\n");
        } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
-               bool is_guilty;
-               dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);
-               /* stop the scheduler, but don't mess with the
-                * bad job yet because if ring reset fails
-                * we'll fall back to full GPU reset.
-                */
-               drm_sched_wqueue_stop(&ring->sched);
-               /* for engine resets, we need to reset the engine,
-                * but individual queues may be unaffected.
-                * check here to make sure the accounting is correct.
-                */
-               if (ring->funcs->is_guilty)
-                       is_guilty = ring->funcs->is_guilty(ring);
-               else
-                       is_guilty = true;
-               if (is_guilty)
-                       dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
-               r = amdgpu_ring_reset(ring, job->vmid);
+               dev_err(adev->dev, "Starting %s ring reset\n",
+                       s_job->sched->name);
+               r = amdgpu_ring_reset(ring, job->vmid, NULL);
                if (!r) {
-                       if (amdgpu_ring_sched_ready(ring))
-                               drm_sched_stop(&ring->sched, s_job);
-                       if (is_guilty) {
-                               atomic_inc(&ring->adev->gpu_reset_counter);
-                               amdgpu_fence_driver_force_completion(ring);
-                       }
-                       if (amdgpu_ring_sched_ready(ring))
-                               drm_sched_start(&ring->sched, 0);
-                       dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
-                       drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info);
+                       atomic_inc(&ring->adev->gpu_reset_counter);
+                       dev_err(adev->dev, "Ring %s reset succeeded\n",
+                               ring->sched.name);
+                       drm_dev_wedged_event(adev_to_drm(adev),
 -                                           DRM_WEDGE_RECOVERY_NONE);
++                                           DRM_WEDGE_RECOVERY_NONE, info);
                        goto exit;
                }
-               dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
+               dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
        }
        dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
  
 +      amdgpu_vm_put_task_info(ti);
 +
        if (amdgpu_device_should_recover_gpu(ring->adev)) {
                struct amdgpu_reset_context reset_context;
                memset(&reset_context, 0, sizeof(reset_context));
Simple merge