From: Dave Airlie <airlied@redhat.com>
Date: Fri, 4 Jul 2025 00:06:22 +0000 (+1000)
Subject: Merge tag 'amd-drm-next-6.17-2025-07-01' of https://gitlab.freedesktop.org/agd5f... 
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7e2818386aad54ba5ab70e228c555814d33bdad1;p=thirdparty%2Flinux.git

Merge tag 'amd-drm-next-6.17-2025-07-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.17-2025-07-01:

amdgpu:
- FAMS2 fixes
- OLED fixes
- Misc cleanups
- AUX fixes
- DMCUB updates
- SR-IOV hibernation support
- RAS updates
- DP tunneling fixes
- DML2 fixes
- Backlight improvements
- Suspend improvements
- Use scaling for non-native modes on eDP
- SDMA 4.4.x fixes
- PCIe DPM fixes
- SDMA 5.x fixes
- Cleaner shader updates for GC 9.x
- Remove fence slab
- ISP genpd support
- Parition handling rework
- SDMA FW checks for userq support
- Add missing firmware declaration
- Fix leak in amdgpu_ctx_mgr_entity_fini()
- Freesync fix
- Ring reset refactoring
- Legacy dpm verbosity changes

amdkfd:
- GWS fix
- mtype fix for ext coherent system memory
- MMU notifier fix
- gfx7/8 fix

radeon:
- CS validation support for additional GL extensions
- Bump driver version for new CS validation checks

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20250701194707.32905-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
---

7e2818386aad54ba5ab70e228c555814d33bdad1
diff --cc drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 8cecf25996edc,3d170060282ee..2c3547f4cea42
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@@ -322,11 -283,13 +283,13 @@@ bool amdgpu_fence_process(struct amdgpu
   */
  static void amdgpu_fence_fallback(struct timer_list *t)
  {
 -	struct amdgpu_ring *ring = from_timer(ring, t,
 -					      fence_drv.fallback_timer);
 +	struct amdgpu_ring *ring = timer_container_of(ring, t,
 +						      fence_drv.fallback_timer);
  
  	if (amdgpu_fence_process(ring))
- 		DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+ 		dev_warn(ring->adev->dev,
+ 			 "Fence fallback timer expired on ring %s\n",
+ 			 ring->name);
  }
  
  /**
diff --cc drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 13c60cac4261b,cf988077a3eef..183fa33c24347
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@@ -729,10 -733,10 +733,10 @@@ static const struct irq_domain_ops amdg
   */
  int amdgpu_irq_add_domain(struct amdgpu_device *adev)
  {
 -	adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
 -						 &amdgpu_hw_irqdomain_ops, adev);
 +	adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
 +						    &amdgpu_hw_irqdomain_ops, adev);
  	if (!adev->irq.domain) {
- 		DRM_ERROR("GPU irq add domain failed\n");
+ 		dev_err(adev->dev, "GPU irq add domain failed\n");
  		return -ENODEV;
  	}
  
diff --cc drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1e24590ae1449,f0b7080dccb8d..2b58e353cca15
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@@ -89,11 -89,9 +89,10 @@@ static enum drm_gpu_sched_stat amdgpu_j
  {
  	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  	struct amdgpu_job *job = to_amdgpu_job(s_job);
 +	struct drm_wedge_task_info *info = NULL;
  	struct amdgpu_task_info *ti;
  	struct amdgpu_device *adev = ring->adev;
- 	int idx;
- 	int r;
+ 	int idx, r;
  
  	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
  		dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
@@@ -133,47 -133,22 +132,24 @@@
  	if (unlikely(adev->debug_disable_gpu_ring_reset)) {
  		dev_err(adev->dev, "Ring reset disabled by debug mask\n");
  	} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
- 		bool is_guilty;
- 
- 		dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);
- 		/* stop the scheduler, but don't mess with the
- 		 * bad job yet because if ring reset fails
- 		 * we'll fall back to full GPU reset.
- 		 */
- 		drm_sched_wqueue_stop(&ring->sched);
- 
- 		/* for engine resets, we need to reset the engine,
- 		 * but individual queues may be unaffected.
- 		 * check here to make sure the accounting is correct.
- 		 */
- 		if (ring->funcs->is_guilty)
- 			is_guilty = ring->funcs->is_guilty(ring);
- 		else
- 			is_guilty = true;
- 
- 		if (is_guilty)
- 			dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
- 
- 		r = amdgpu_ring_reset(ring, job->vmid);
+ 		dev_err(adev->dev, "Starting %s ring reset\n",
+ 			s_job->sched->name);
+ 		r = amdgpu_ring_reset(ring, job->vmid, NULL);
  		if (!r) {
- 			if (amdgpu_ring_sched_ready(ring))
- 				drm_sched_stop(&ring->sched, s_job);
- 			if (is_guilty) {
- 				atomic_inc(&ring->adev->gpu_reset_counter);
- 				amdgpu_fence_driver_force_completion(ring);
- 			}
- 			if (amdgpu_ring_sched_ready(ring))
- 				drm_sched_start(&ring->sched, 0);
- 			dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
- 			drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info);
+ 			atomic_inc(&ring->adev->gpu_reset_counter);
+ 			dev_err(adev->dev, "Ring %s reset succeeded\n",
+ 				ring->sched.name);
+ 			drm_dev_wedged_event(adev_to_drm(adev),
 -					     DRM_WEDGE_RECOVERY_NONE);
++					     DRM_WEDGE_RECOVERY_NONE, info);
  			goto exit;
  		}
- 		dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
+ 		dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
  	}
+ 
  	dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
  
 +	amdgpu_vm_put_task_info(ti);
 +
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {
  		struct amdgpu_reset_context reset_context;
  		memset(&reset_context, 0, sizeof(reset_context));