From c2b8725518ac1c8f4ef2aaa3b952faae8a2e01b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 12 Jul 2025 16:37:21 +0200 Subject: [PATCH] 6.15-stable patches added patches: drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch drm-gem-fix-race-in-drm_gem_handle_create_tail.patch drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch drm-xe-bmg-fix-compressed-vram-handling.patch revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch --- ...nces-on-gem-handles-for-framebuffers.patch | 227 ++++++++++++++++++ ...x-race-in-drm_gem_handle_create_tail.patch | 140 +++++++++++ ...t-fail-module-init-on-debugfs-errors.patch | 92 +++++++ ...ount-before-swapping-tail-spsc-queue.patch | 50 ++++ ...ndling-in-ttm_buffer_object_transfer.patch | 55 +++++ ...-xe-bmg-fix-compressed-vram-handling.patch | 52 ++++ ...-indirect-ring-state-support-for-xe2.patch | 44 ++++ queue-6.15/series | 7 + 8 files changed, 667 insertions(+) create mode 100644 queue-6.15/drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch create mode 100644 queue-6.15/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch create mode 100644 queue-6.15/drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch create mode 100644 queue-6.15/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch create mode 100644 queue-6.15/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch create mode 100644 queue-6.15/drm-xe-bmg-fix-compressed-vram-handling.patch create mode 100644 queue-6.15/revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch diff --git a/queue-6.15/drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch b/queue-6.15/drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch new file mode 100644 index 0000000000..b269c82b87 --- /dev/null +++ b/queue-6.15/drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch @@ -0,0 +1,227 @@ +From 5307dce878d4126e1b375587318955bd019c3741 Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Mon, 30 Jun 2025 10:36:47 +0200 +Subject: drm/gem: Acquire references on GEM handles for framebuffers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Zimmermann + +commit 5307dce878d4126e1b375587318955bd019c3741 upstream. + +A GEM handle can be released while the GEM buffer object is attached +to a DRM framebuffer. This leads to the release of the dma-buf backing +the buffer object, if any. [1] Trying to use the framebuffer in further +mode-setting operations leads to a segmentation fault. Most easily +happens with driver that use shadow planes for vmap-ing the dma-buf +during a page flip. An example is shown below. + +[ 156.791968] ------------[ cut here ]------------ +[ 156.796830] WARNING: CPU: 2 PID: 2255 at drivers/dma-buf/dma-buf.c:1527 dma_buf_vmap+0x224/0x430 +[...] +[ 156.942028] RIP: 0010:dma_buf_vmap+0x224/0x430 +[ 157.043420] Call Trace: +[ 157.045898] +[ 157.048030] ? show_trace_log_lvl+0x1af/0x2c0 +[ 157.052436] ? show_trace_log_lvl+0x1af/0x2c0 +[ 157.056836] ? show_trace_log_lvl+0x1af/0x2c0 +[ 157.061253] ? drm_gem_shmem_vmap+0x74/0x710 +[ 157.065567] ? dma_buf_vmap+0x224/0x430 +[ 157.069446] ? __warn.cold+0x58/0xe4 +[ 157.073061] ? dma_buf_vmap+0x224/0x430 +[ 157.077111] ? report_bug+0x1dd/0x390 +[ 157.080842] ? handle_bug+0x5e/0xa0 +[ 157.084389] ? exc_invalid_op+0x14/0x50 +[ 157.088291] ? asm_exc_invalid_op+0x16/0x20 +[ 157.092548] ? dma_buf_vmap+0x224/0x430 +[ 157.096663] ? dma_resv_get_singleton+0x6d/0x230 +[ 157.101341] ? __pfx_dma_buf_vmap+0x10/0x10 +[ 157.105588] ? __pfx_dma_resv_get_singleton+0x10/0x10 +[ 157.110697] drm_gem_shmem_vmap+0x74/0x710 +[ 157.114866] drm_gem_vmap+0xa9/0x1b0 +[ 157.118763] drm_gem_vmap_unlocked+0x46/0xa0 +[ 157.123086] drm_gem_fb_vmap+0xab/0x300 +[ 157.126979] drm_atomic_helper_prepare_planes.part.0+0x487/0xb10 +[ 157.133032] ? lockdep_init_map_type+0x19d/0x880 +[ 157.137701] drm_atomic_helper_commit+0x13d/0x2e0 +[ 157.142671] ? drm_atomic_nonblocking_commit+0xa0/0x180 +[ 157.147988] drm_mode_atomic_ioctl+0x766/0xe40 +[...] +[ 157.346424] ---[ end trace 0000000000000000 ]--- + +Acquiring GEM handles for the framebuffer's GEM buffer objects prevents +this from happening. The framebuffer's cleanup later puts the handle +references. + +Commit 1a148af06000 ("drm/gem-shmem: Use dma_buf from GEM object +instance") triggers the segmentation fault easily by using the dma-buf +field more widely. The underlying issue with reference counting has +been present before. + +v2: +- acquire the handle instead of the BO (Christian) +- fix comment style (Christian) +- drop the Fixes tag (Christian) +- rename err_ gotos +- add missing Link tag + +Suggested-by: Christian König +Signed-off-by: Thomas Zimmermann +Link: https://elixir.bootlin.com/linux/v6.15/source/drivers/gpu/drm/drm_gem.c#L241 # [1] +Cc: Thomas Zimmermann +Cc: Anusha Srivatsa +Cc: Christian König +Cc: Maarten Lankhorst +Cc: Maxime Ripard +Cc: Sumit Semwal +Cc: "Christian König" +Cc: linux-media@vger.kernel.org +Cc: dri-devel@lists.freedesktop.org +Cc: linaro-mm-sig@lists.linaro.org +Cc: +Reviewed-by: Christian König +Link: https://lore.kernel.org/r/20250630084001.293053-1-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/drm_gem.c | 44 ++++++++++++++++++++++++--- + drivers/gpu/drm/drm_gem_framebuffer_helper.c | 16 +++++---- + drivers/gpu/drm/drm_internal.h | 2 + + 3 files changed, 51 insertions(+), 11 deletions(-) + +--- a/drivers/gpu/drm/drm_gem.c ++++ b/drivers/gpu/drm/drm_gem.c +@@ -212,6 +212,35 @@ void drm_gem_private_object_fini(struct + } + EXPORT_SYMBOL(drm_gem_private_object_fini); + ++static void drm_gem_object_handle_get(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ ++ drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); ++ ++ if (obj->handle_count++ == 0) ++ drm_gem_object_get(obj); ++} ++ ++/** ++ * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles ++ * @obj: GEM object ++ * ++ * Acquires a reference on the GEM buffer object's handle. Required ++ * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked() ++ * to release the reference. ++ */ ++void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ ++ guard(mutex)(&dev->object_name_lock); ++ ++ drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */ ++ drm_gem_object_handle_get(obj); ++} ++EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked); ++ + /** + * drm_gem_object_handle_free - release resources bound to userspace handles + * @obj: GEM object to clean up. +@@ -242,8 +271,14 @@ static void drm_gem_object_exported_dma_ + } + } + +-static void +-drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) ++/** ++ * drm_gem_object_handle_put_unlocked - releases reference on user-space handles ++ * @obj: GEM object ++ * ++ * Releases a reference on the GEM buffer object's handle. Possibly releases ++ * the GEM buffer object and associated dma-buf objects. ++ */ ++void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) + { + struct drm_device *dev = obj->dev; + bool final = false; +@@ -268,6 +303,7 @@ drm_gem_object_handle_put_unlocked(struc + if (final) + drm_gem_object_put(obj); + } ++EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked); + + /* + * Called at device or object close to release the file's +@@ -389,8 +425,8 @@ drm_gem_handle_create_tail(struct drm_fi + int ret; + + WARN_ON(!mutex_is_locked(&dev->object_name_lock)); +- if (obj->handle_count++ == 0) +- drm_gem_object_get(obj); ++ ++ drm_gem_object_handle_get(obj); + + /* + * Get the user-visible handle using idr. Preload and perform +--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c ++++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c +@@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_frame + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) +- drm_gem_object_put(fb->obj[i]); ++ drm_gem_object_handle_put_unlocked(fb->obj[i]); + + drm_framebuffer_cleanup(fb); + kfree(fb); +@@ -182,8 +182,10 @@ int drm_gem_fb_init_with_funcs(struct dr + if (!objs[i]) { + drm_dbg_kms(dev, "Failed to lookup GEM object\n"); + ret = -ENOENT; +- goto err_gem_object_put; ++ goto err_gem_object_handle_put_unlocked; + } ++ drm_gem_object_handle_get_unlocked(objs[i]); ++ drm_gem_object_put(objs[i]); + + min_size = (height - 1) * mode_cmd->pitches[i] + + drm_format_info_min_pitch(info, i, width) +@@ -193,22 +195,22 @@ int drm_gem_fb_init_with_funcs(struct dr + drm_dbg_kms(dev, + "GEM object size (%zu) smaller than minimum size (%u) for plane %d\n", + objs[i]->size, min_size, i); +- drm_gem_object_put(objs[i]); ++ drm_gem_object_handle_put_unlocked(objs[i]); + ret = -EINVAL; +- goto err_gem_object_put; ++ goto err_gem_object_handle_put_unlocked; + } + } + + ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); + if (ret) +- goto err_gem_object_put; ++ goto err_gem_object_handle_put_unlocked; + + return 0; + +-err_gem_object_put: ++err_gem_object_handle_put_unlocked: + while (i > 0) { + --i; +- drm_gem_object_put(objs[i]); ++ drm_gem_object_handle_put_unlocked(objs[i]); + } + return ret; + } +--- a/drivers/gpu/drm/drm_internal.h ++++ b/drivers/gpu/drm/drm_internal.h +@@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_de + + /* drm_gem.c */ + int drm_gem_init(struct drm_device *dev); ++void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj); ++void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); + int drm_gem_handle_create_tail(struct drm_file *file_priv, + struct drm_gem_object *obj, + u32 *handlep); diff --git a/queue-6.15/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch b/queue-6.15/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch new file mode 100644 index 0000000000..df89a5d79f --- /dev/null +++ b/queue-6.15/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch @@ -0,0 +1,140 @@ +From bd46cece51a36ef088f22ef0416ac13b0a46d5b0 Mon Sep 17 00:00:00 2001 +From: Simona Vetter +Date: Mon, 7 Jul 2025 17:18:13 +0200 +Subject: drm/gem: Fix race in drm_gem_handle_create_tail() + +From: Simona Vetter + +commit bd46cece51a36ef088f22ef0416ac13b0a46d5b0 upstream. + +Object creation is a careful dance where we must guarantee that the +object is fully constructed before it is visible to other threads, and +GEM buffer objects are no difference. + +Final publishing happens by calling drm_gem_handle_create(). After +that the only allowed thing to do is call drm_gem_object_put() because +a concurrent call to the GEM_CLOSE ioctl with a correctly guessed id +(which is trivial since we have a linear allocator) can already tear +down the object again. + +Luckily most drivers get this right, the very few exceptions I've +pinged the relevant maintainers for. Unfortunately we also need +drm_gem_handle_create() when creating additional handles for an +already existing object (e.g. GETFB ioctl or the various bo import +ioctl), and hence we cannot have a drm_gem_handle_create_and_put() as +the only exported function to stop these issues from happening. + +Now unfortunately the implementation of drm_gem_handle_create() isn't +living up to standards: It does correctly finishe object +initialization at the global level, and hence is safe against a +concurrent tear down. But it also sets up the file-private aspects of +the handle, and that part goes wrong: We fully register the object in +the drm_file.object_idr before calling drm_vma_node_allow() or +obj->funcs->open, which opens up races against concurrent removal of +that handle in drm_gem_handle_delete(). + +Fix this with the usual two-stage approach of first reserving the +handle id, and then only registering the object after we've completed +the file-private setup. + +Jacek reported this with a testcase of concurrently calling GEM_CLOSE +on a freshly-created object (which also destroys the object), but it +should be possible to hit this with just additional handles created +through import or GETFB without completed destroying the underlying +object with the concurrent GEM_CLOSE ioctl calls. + +Note that the close-side of this race was fixed in f6cd7daecff5 ("drm: +Release driver references to handle before making it available +again"), which means a cool 9 years have passed until someone noticed +that we need to make this symmetry or there's still gaps left :-/ +Without the 2-stage close approach we'd still have a race, therefore +that's an integral part of this bugfix. + +More importantly, this means we can have NULL pointers behind +allocated id in our drm_file.object_idr. We need to check for that +now: + +- drm_gem_handle_delete() checks for ERR_OR_NULL already + +- drm_gem.c:object_lookup() also chekcs for NULL + +- drm_gem_release() should never be called if there's another thread + still existing that could call into an IOCTL that creates a new + handle, so cannot race. For paranoia I added a NULL check to + drm_gem_object_release_handle() though. + +- most drivers (etnaviv, i915, msm) are find because they use + idr_find(), which maps both ENOENT and NULL to NULL. + +- drivers using idr_for_each_entry() should also be fine, because + idr_get_next does filter out NULL entries and continues the + iteration. + +- The same holds for drm_show_memory_stats(). + +v2: Use drm_WARN_ON (Thomas) + +Reported-by: Jacek Lawrynowicz +Tested-by: Jacek Lawrynowicz +Reviewed-by: Thomas Zimmermann +Cc: stable@vger.kernel.org +Cc: Jacek Lawrynowicz +Cc: Maarten Lankhorst +Cc: Maxime Ripard +Cc: Thomas Zimmermann +Cc: David Airlie +Cc: Simona Vetter +Signed-off-by: Simona Vetter +Signed-off-by: Simona Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20250707151814.603897-1-simona.vetter@ffwll.ch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/drm_gem.c | 10 +++++++++- + include/drm/drm_file.h | 3 +++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_gem.c ++++ b/drivers/gpu/drm/drm_gem.c +@@ -315,6 +315,9 @@ drm_gem_object_release_handle(int id, vo + struct drm_file *file_priv = data; + struct drm_gem_object *obj = ptr; + ++ if (drm_WARN_ON(obj->dev, !data)) ++ return 0; ++ + if (obj->funcs->close) + obj->funcs->close(obj, file_priv); + +@@ -435,7 +438,7 @@ drm_gem_handle_create_tail(struct drm_fi + idr_preload(GFP_KERNEL); + spin_lock(&file_priv->table_lock); + +- ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); ++ ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); + + spin_unlock(&file_priv->table_lock); + idr_preload_end(); +@@ -456,6 +459,11 @@ drm_gem_handle_create_tail(struct drm_fi + goto err_revoke; + } + ++ /* mirrors drm_gem_handle_delete to avoid races */ ++ spin_lock(&file_priv->table_lock); ++ obj = idr_replace(&file_priv->object_idr, obj, handle); ++ WARN_ON(obj != NULL); ++ spin_unlock(&file_priv->table_lock); + *handlep = handle; + return 0; + +--- a/include/drm/drm_file.h ++++ b/include/drm/drm_file.h +@@ -300,6 +300,9 @@ struct drm_file { + * + * Mapping of mm object handles to object pointers. Used by the GEM + * subsystem. Protected by @table_lock. ++ * ++ * Note that allocated entries might be NULL as a transient state when ++ * creating or deleting a handle. + */ + struct idr object_idr; + diff --git a/queue-6.15/drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch b/queue-6.15/drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch new file mode 100644 index 0000000000..1eef89d31b --- /dev/null +++ b/queue-6.15/drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch @@ -0,0 +1,92 @@ +From 78f88067d5c56d9aed69f27e238742841461cf67 Mon Sep 17 00:00:00 2001 +From: Aaron Thompson +Date: Thu, 3 Jul 2025 21:19:49 +0000 +Subject: drm/nouveau: Do not fail module init on debugfs errors + +From: Aaron Thompson + +commit 78f88067d5c56d9aed69f27e238742841461cf67 upstream. + +If CONFIG_DEBUG_FS is enabled, nouveau_drm_init() returns an error if it +fails to create the "nouveau" directory in debugfs. One case where that +will happen is when debugfs access is restricted by +CONFIG_DEBUG_FS_ALLOW_NONE or by the boot parameter debugfs=off, which +cause the debugfs APIs to return -EPERM. + +So just ignore errors from debugfs. Note that nouveau_debugfs_root may +be an error now, but that is a standard pattern for debugfs. From +include/linux/debugfs.h: + +"NOTE: it's expected that most callers should _ignore_ the errors +returned by this function. Other debugfs functions handle the fact that +the "dentry" passed to them could be an error and they don't crash in +that case. Drivers should generally work fine even if debugfs fails to +init anyway." + +Fixes: 97118a1816d2 ("drm/nouveau: create module debugfs root") +Cc: stable@vger.kernel.org +Signed-off-by: Aaron Thompson +Acked-by: Timur Tabi +Signed-off-by: Danilo Krummrich +Link: https://lore.kernel.org/r/20250703211949.9916-1-dev@aaront.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_debugfs.c | 6 +----- + drivers/gpu/drm/nouveau/nouveau_debugfs.h | 5 ++--- + drivers/gpu/drm/nouveau/nouveau_drm.c | 4 +--- + 3 files changed, 4 insertions(+), 11 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c ++++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c +@@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm + drm->debugfs = NULL; + } + +-int ++void + nouveau_module_debugfs_init(void) + { + nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); +- if (IS_ERR(nouveau_debugfs_root)) +- return PTR_ERR(nouveau_debugfs_root); +- +- return 0; + } + + void +--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h ++++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h +@@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct + + extern struct dentry *nouveau_debugfs_root; + +-int nouveau_module_debugfs_init(void); ++void nouveau_module_debugfs_init(void); + void nouveau_module_debugfs_fini(void); + #else + static inline void +@@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm + { + } + +-static inline int ++static inline void + nouveau_module_debugfs_init(void) + { +- return 0; + } + + static inline void +--- a/drivers/gpu/drm/nouveau/nouveau_drm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c +@@ -1456,9 +1456,7 @@ nouveau_drm_init(void) + if (!nouveau_modeset) + return 0; + +- ret = nouveau_module_debugfs_init(); +- if (ret) +- return ret; ++ nouveau_module_debugfs_init(); + + #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER + platform_driver_register(&nouveau_platform_driver); diff --git a/queue-6.15/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch b/queue-6.15/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch new file mode 100644 index 0000000000..36ea90986e --- /dev/null +++ b/queue-6.15/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch @@ -0,0 +1,50 @@ +From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Fri, 13 Jun 2025 14:20:13 -0700 +Subject: drm/sched: Increment job count before swapping tail spsc queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Brost + +commit 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee upstream. + +A small race exists between spsc_queue_push and the run-job worker, in +which spsc_queue_push may return not-first while the run-job worker has +already idled due to the job count being zero. If this race occurs, job +scheduling stops, leading to hangs while waiting on the job’s DMA +fences. + +Seal this race by incrementing the job count before appending to the +SPSC queue. + +This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in +an SVM test case. + +Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location") +Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.") +Cc: stable@vger.kernel.org +Signed-off-by: Matthew Brost +Reviewed-by: Jonathan Cavitt +Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + include/drm/spsc_queue.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/include/drm/spsc_queue.h ++++ b/include/drm/spsc_queue.h +@@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struc + + preempt_disable(); + ++ atomic_inc(&queue->job_count); ++ smp_mb__after_atomic(); ++ + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); + WRITE_ONCE(*tail, node); +- atomic_inc(&queue->job_count); + + /* + * In case of first element verify new node will be visible to the consumer diff --git a/queue-6.15/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch b/queue-6.15/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch new file mode 100644 index 0000000000..2e5ce30d84 --- /dev/null +++ b/queue-6.15/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch @@ -0,0 +1,55 @@ +From 97e000acf2e20a86a50a0ec8c2739f0846f37509 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Fri, 13 Jun 2025 13:16:38 +0200 +Subject: drm/ttm: fix error handling in ttm_buffer_object_transfer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit 97e000acf2e20a86a50a0ec8c2739f0846f37509 upstream. + +Unlocking the resv object was missing in the error path, additionally to +that we should move over the resource only after the fence slot was +reserved. + +Signed-off-by: Christian König +Reviewed-by: Matthew Brost +Fixes: c8d4c18bfbc4a ("dma-buf/drivers: make reserving a shared slot mandatory v4") +Cc: +Link: https://lore.kernel.org/r/20250616130726.22863-3-christian.koenig@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/ttm/ttm_bo_util.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/ttm/ttm_bo_util.c ++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c +@@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(st + ret = dma_resv_trylock(&fbo->base.base._resv); + WARN_ON(!ret); + ++ ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); ++ if (ret) { ++ dma_resv_unlock(&fbo->base.base._resv); ++ kfree(fbo); ++ return ret; ++ } ++ + if (fbo->base.resource) { + ttm_resource_set_bo(fbo->base.resource, &fbo->base); + bo->resource = NULL; +@@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(st + fbo->base.bulk_move = NULL; + } + +- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); +- if (ret) { +- kfree(fbo); +- return ret; +- } +- + ttm_bo_get(bo); + fbo->bo = bo; + diff --git a/queue-6.15/drm-xe-bmg-fix-compressed-vram-handling.patch b/queue-6.15/drm-xe-bmg-fix-compressed-vram-handling.patch new file mode 100644 index 0000000000..b6f9bd3b8c --- /dev/null +++ b/queue-6.15/drm-xe-bmg-fix-compressed-vram-handling.patch @@ -0,0 +1,52 @@ +From fee58ca135a7b979c8b75e6d2eac60d695f9209b Mon Sep 17 00:00:00 2001 +From: Matthew Auld +Date: Tue, 1 Jul 2025 11:39:50 +0100 +Subject: drm/xe/bmg: fix compressed VRAM handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Auld + +commit fee58ca135a7b979c8b75e6d2eac60d695f9209b upstream. + +There looks to be an issue in our compression handling when the BO pages +are very fragmented, where we choose to skip the identity map and +instead fall back to emitting the PTEs by hand when migrating memory, +such that we can hopefully do more work per blit operation. However in +such a case we need to ensure the src PTEs are correctly tagged with a +compression enabled PAT index on dgpu xe2+, otherwise the copy will +simply treat the src memory as uncompressed, leading to corruption if +the memory was compressed by the user. + +To fix this pass along use_comp_pat into emit_pte() on the src side, to +indicate that compression should be considered. + +v2 (Jonathan): tweak the commit message + +Fixes: 523f191cc0c7 ("drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx") +Signed-off-by: Matthew Auld +Cc: Himal Prasad Ghimiray +Cc: Thomas Hellström +Cc: Akshata Jahagirdar +Cc: # v6.12+ +Reviewed-by: Jonathan Cavitt +Link: https://lore.kernel.org/r/20250701103949.83116-2-matthew.auld@intel.com +(cherry picked from commit f7a2fd776e57bd6468644bdecd91ab3aba57ba58) +Signed-off-by: Lucas De Marchi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_migrate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/xe/xe_migrate.c ++++ b/drivers/gpu/drm/xe/xe_migrate.c +@@ -860,7 +860,7 @@ struct dma_fence *xe_migrate_copy(struct + if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) + xe_res_next(&src_it, src_L0); + else +- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, ++ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, + &src_it, src_L0, src); + + if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) diff --git a/queue-6.15/revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch b/queue-6.15/revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch new file mode 100644 index 0000000000..d35b581ba6 --- /dev/null +++ b/queue-6.15/revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch @@ -0,0 +1,44 @@ +From daa099fed50a39256feb37d3fac146bf0d74152f Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Tue, 1 Jul 2025 20:58:46 -0700 +Subject: Revert "drm/xe/xe2: Enable Indirect Ring State support for Xe2" + +From: Matthew Brost + +commit daa099fed50a39256feb37d3fac146bf0d74152f upstream. + +This reverts commit fe0154cf8222d9e38c60ccc124adb2f9b5272371. + +Seeing some unexplained random failures during LRC context switches with +indirect ring state enabled. The failures were always there, but the +repro rate increased with the addition of WA BB as a separate BO. +Commit 3a1edef8f4b5 ("drm/xe: Make WA BB part of LRC BO") helped to +reduce the issues in the context switches, but didn't eliminate them +completely. + +Indirect ring state is not required for any current features, so disable +for now until failures can be root caused. + +Cc: stable@vger.kernel.org +Fixes: fe0154cf8222 ("drm/xe/xe2: Enable Indirect Ring State support for Xe2") +Signed-off-by: Matthew Brost +Reviewed-by: Lucas De Marchi +Link: https://lore.kernel.org/r/20250702035846.3178344-1-matthew.brost@intel.com +Signed-off-by: Lucas De Marchi +(cherry picked from commit 03d85ab36bcbcbe9dc962fccd3f8e54d7bb93b35) +Signed-off-by: Lucas De Marchi +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/xe/xe_pci.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/gpu/drm/xe/xe_pci.c ++++ b/drivers/gpu/drm/xe/xe_pci.c +@@ -137,7 +137,6 @@ static const struct xe_graphics_desc gra + .has_asid = 1, \ + .has_atomic_enable_pte_bit = 1, \ + .has_flat_ccs = 1, \ +- .has_indirect_ring_state = 1, \ + .has_range_tlb_invalidation = 1, \ + .has_usm = 1, \ + .has_64bit_timestamp = 1, \ diff --git a/queue-6.15/series b/queue-6.15/series index 121c537cc6..a6d5cb6845 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -82,3 +82,10 @@ drm-imagination-fix-kernel-crash-when-hard-resetting-the-gpu.patch drm-amdkfd-don-t-call-mmput-from-mmu-notifier-callback.patch drm-amdgpu-include-sdma_4_4_4.bin.patch drm-amdkfd-add-hqd_sdma_get_doorbell-callbacks-for-gfx7-8.patch +drm-gem-acquire-references-on-gem-handles-for-framebuffers.patch +drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch +drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch +drm-gem-fix-race-in-drm_gem_handle_create_tail.patch +drm-xe-bmg-fix-compressed-vram-handling.patch +revert-drm-xe-xe2-enable-indirect-ring-state-support-for-xe2.patch +drm-nouveau-do-not-fail-module-init-on-debugfs-errors.patch -- 2.47.2