]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Jul 2025 14:36:56 +0000 (16:36 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Jul 2025 14:36:56 +0000 (16:36 +0200)
added patches:
drm-gem-fix-race-in-drm_gem_handle_create_tail.patch
drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch
drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch

queue-6.1/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch [new file with mode: 0644]
queue-6.1/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch [new file with mode: 0644]
queue-6.1/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch b/queue-6.1/drm-gem-fix-race-in-drm_gem_handle_create_tail.patch
new file mode 100644 (file)
index 0000000..5c8db29
--- /dev/null
@@ -0,0 +1,140 @@
+From bd46cece51a36ef088f22ef0416ac13b0a46d5b0 Mon Sep 17 00:00:00 2001
+From: Simona Vetter <simona.vetter@ffwll.ch>
+Date: Mon, 7 Jul 2025 17:18:13 +0200
+Subject: drm/gem: Fix race in drm_gem_handle_create_tail()
+
+From: Simona Vetter <simona.vetter@ffwll.ch>
+
+commit bd46cece51a36ef088f22ef0416ac13b0a46d5b0 upstream.
+
+Object creation is a careful dance where we must guarantee that the
+object is fully constructed before it is visible to other threads, and
+GEM buffer objects are no difference.
+
+Final publishing happens by calling drm_gem_handle_create(). After
+that the only allowed thing to do is call drm_gem_object_put() because
+a concurrent call to the GEM_CLOSE ioctl with a correctly guessed id
+(which is trivial since we have a linear allocator) can already tear
+down the object again.
+
+Luckily most drivers get this right, the very few exceptions I've
+pinged the relevant maintainers for. Unfortunately we also need
+drm_gem_handle_create() when creating additional handles for an
+already existing object (e.g. GETFB ioctl or the various bo import
+ioctl), and hence we cannot have a drm_gem_handle_create_and_put() as
+the only exported function to stop these issues from happening.
+
+Now unfortunately the implementation of drm_gem_handle_create() isn't
+living up to standards: It does correctly finishe object
+initialization at the global level, and hence is safe against a
+concurrent tear down. But it also sets up the file-private aspects of
+the handle, and that part goes wrong: We fully register the object in
+the drm_file.object_idr before calling drm_vma_node_allow() or
+obj->funcs->open, which opens up races against concurrent removal of
+that handle in drm_gem_handle_delete().
+
+Fix this with the usual two-stage approach of first reserving the
+handle id, and then only registering the object after we've completed
+the file-private setup.
+
+Jacek reported this with a testcase of concurrently calling GEM_CLOSE
+on a freshly-created object (which also destroys the object), but it
+should be possible to hit this with just additional handles created
+through import or GETFB without completed destroying the underlying
+object with the concurrent GEM_CLOSE ioctl calls.
+
+Note that the close-side of this race was fixed in f6cd7daecff5 ("drm:
+Release driver references to handle before making it available
+again"), which means a cool 9 years have passed until someone noticed
+that we need to make this symmetry or there's still gaps left :-/
+Without the 2-stage close approach we'd still have a race, therefore
+that's an integral part of this bugfix.
+
+More importantly, this means we can have NULL pointers behind
+allocated id in our drm_file.object_idr. We need to check for that
+now:
+
+- drm_gem_handle_delete() checks for ERR_OR_NULL already
+
+- drm_gem.c:object_lookup() also chekcs for NULL
+
+- drm_gem_release() should never be called if there's another thread
+  still existing that could call into an IOCTL that creates a new
+  handle, so cannot race. For paranoia I added a NULL check to
+  drm_gem_object_release_handle() though.
+
+- most drivers (etnaviv, i915, msm) are find because they use
+  idr_find(), which maps both ENOENT and NULL to NULL.
+
+- drivers using idr_for_each_entry() should also be fine, because
+  idr_get_next does filter out NULL entries and continues the
+  iteration.
+
+- The same holds for drm_show_memory_stats().
+
+v2: Use drm_WARN_ON (Thomas)
+
+Reported-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Tested-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: stable@vger.kernel.org
+Cc: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Cc: Maxime Ripard <mripard@kernel.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: David Airlie <airlied@gmail.com>
+Cc: Simona Vetter <simona@ffwll.ch>
+Signed-off-by: Simona Vetter <simona.vetter@intel.com>
+Signed-off-by: Simona Vetter <simona.vetter@ffwll.ch>
+Link: https://patchwork.freedesktop.org/patch/msgid/20250707151814.603897-1-simona.vetter@ffwll.ch
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_gem.c |   10 +++++++++-
+ include/drm/drm_file.h    |    3 +++
+ 2 files changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_gem.c
++++ b/drivers/gpu/drm/drm_gem.c
+@@ -236,6 +236,9 @@ drm_gem_object_release_handle(int id, vo
+       struct drm_file *file_priv = data;
+       struct drm_gem_object *obj = ptr;
++      if (drm_WARN_ON(obj->dev, !data))
++              return 0;
++
+       if (obj->funcs->close)
+               obj->funcs->close(obj, file_priv);
+@@ -363,7 +366,7 @@ drm_gem_handle_create_tail(struct drm_fi
+       idr_preload(GFP_KERNEL);
+       spin_lock(&file_priv->table_lock);
+-      ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
++      ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT);
+       spin_unlock(&file_priv->table_lock);
+       idr_preload_end();
+@@ -384,6 +387,11 @@ drm_gem_handle_create_tail(struct drm_fi
+                       goto err_revoke;
+       }
++      /* mirrors drm_gem_handle_delete to avoid races */
++      spin_lock(&file_priv->table_lock);
++      obj = idr_replace(&file_priv->object_idr, obj, handle);
++      WARN_ON(obj != NULL);
++      spin_unlock(&file_priv->table_lock);
+       *handlep = handle;
+       return 0;
+--- a/include/drm/drm_file.h
++++ b/include/drm/drm_file.h
+@@ -273,6 +273,9 @@ struct drm_file {
+        *
+        * Mapping of mm object handles to object pointers. Used by the GEM
+        * subsystem. Protected by @table_lock.
++       *
++       * Note that allocated entries might be NULL as a transient state when
++       * creating or deleting a handle.
+        */
+       struct idr object_idr;
diff --git a/queue-6.1/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch b/queue-6.1/drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch
new file mode 100644 (file)
index 0000000..36ea909
--- /dev/null
@@ -0,0 +1,50 @@
+From 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Fri, 13 Jun 2025 14:20:13 -0700
+Subject: drm/sched: Increment job count before swapping tail spsc queue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Matthew Brost <matthew.brost@intel.com>
+
+commit 8af39ec5cf2be522c8eb43a3d8005ed59e4daaee upstream.
+
+A small race exists between spsc_queue_push and the run-job worker, in
+which spsc_queue_push may return not-first while the run-job worker has
+already idled due to the job count being zero. If this race occurs, job
+scheduling stops, leading to hangs while waiting on the job’s DMA
+fences.
+
+Seal this race by incrementing the job count before appending to the
+SPSC queue.
+
+This race was observed on a drm-tip 6.16-rc1 build with the Xe driver in
+an SVM test case.
+
+Fixes: 1b1f42d8fde4 ("drm: move amd_gpu_scheduler into common location")
+Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
+Link: https://lore.kernel.org/r/20250613212013.719312-1-matthew.brost@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/drm/spsc_queue.h |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/drm/spsc_queue.h
++++ b/include/drm/spsc_queue.h
+@@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struc
+       preempt_disable();
++      atomic_inc(&queue->job_count);
++      smp_mb__after_atomic();
++
+       tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next);
+       WRITE_ONCE(*tail, node);
+-      atomic_inc(&queue->job_count);
+       /*
+        * In case of first element verify new node will be visible to the consumer
diff --git a/queue-6.1/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch b/queue-6.1/drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch
new file mode 100644 (file)
index 0000000..18f0375
--- /dev/null
@@ -0,0 +1,55 @@
+From 97e000acf2e20a86a50a0ec8c2739f0846f37509 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 13 Jun 2025 13:16:38 +0200
+Subject: drm/ttm: fix error handling in ttm_buffer_object_transfer
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+commit 97e000acf2e20a86a50a0ec8c2739f0846f37509 upstream.
+
+Unlocking the resv object was missing in the error path, additionally to
+that we should move over the resource only after the fence slot was
+reserved.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Matthew Brost <matthew.brost@intel.com>
+Fixes: c8d4c18bfbc4a ("dma-buf/drivers: make reserving a shared slot mandatory v4")
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20250616130726.22863-3-christian.koenig@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_bo_util.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
++++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
+@@ -244,6 +244,13 @@ static int ttm_buffer_object_transfer(st
+       ret = dma_resv_trylock(&fbo->base.base._resv);
+       WARN_ON(!ret);
++      ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
++      if (ret) {
++              dma_resv_unlock(&fbo->base.base._resv);
++              kfree(fbo);
++              return ret;
++      }
++
+       if (fbo->base.resource) {
+               ttm_resource_set_bo(fbo->base.resource, &fbo->base);
+               bo->resource = NULL;
+@@ -252,12 +259,6 @@ static int ttm_buffer_object_transfer(st
+               fbo->base.bulk_move = NULL;
+       }
+-      ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+-      if (ret) {
+-              kfree(fbo);
+-              return ret;
+-      }
+-
+       ttm_bo_get(bo);
+       fbo->bo = bo;
index 1dec4567462f6eaadd2ec08e66952c2807676931..027b6ff8df90c33b0e8aa897d810768afc125305 100644 (file)
@@ -30,3 +30,6 @@ gre-fix-ipv6-multicast-route-creation.patch
 md-md-bitmap-fix-gpf-in-bitmap_get_stats.patch
 pinctrl-qcom-msm-mark-certain-pins-as-invalid-for-interrupts.patch
 wifi-prevent-a-msdu-attacks-in-mesh-networks.patch
+drm-sched-increment-job-count-before-swapping-tail-spsc-queue.patch
+drm-ttm-fix-error-handling-in-ttm_buffer_object_transfer.patch
+drm-gem-fix-race-in-drm_gem_handle_create_tail.patch