6.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)
diff --git a/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch b/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch

new file mode 100644 (file)

index 0000000..01aecc8
--- /dev/null
+++ b/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
@@ -0,0 +1,43 @@
+From stable+bounces-192197-greg=kroah.com@vger.kernel.org Mon Nov  3 21:47:10 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:50 -0500
+Subject: drm/sched: Fix race in drm_sched_entity_select_rq()
+To: stable@vger.kernel.org
+Cc: Philipp Stanner <phasta@kernel.org>, Tvrtko Ursulin <tvrtko.ursulin@igalia.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-3-sashal@kernel.org>
+
+From: Philipp Stanner <phasta@kernel.org>
+
+[ Upstream commit d25e3a610bae03bffc5c14b5d944a5d0cd844678 ]
+
+In a past bug fix it was forgotten that entity access must be protected
+by the entity lock. That's a data race and potentially UB.
+
+Move the spin_unlock() to the appropriate position.
+
+Cc: stable@vger.kernel.org # v5.13+
+Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3")
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Signed-off-by: Philipp Stanner <phasta@kernel.org>
+Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -558,10 +558,11 @@ void drm_sched_entity_select_rq(struct d
+               drm_sched_rq_remove_entity(entity->rq, entity);
+               entity->rq = rq;
+       }
+-      spin_unlock(&entity->lock);
+ 
+       if (entity->num_sched_list == 1)
+               entity->sched_list = NULL;
++
++      spin_unlock(&entity->lock);
+ }
+ 
+ /**
diff --git a/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch b/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch

new file mode 100644 (file)

index 0000000..60ff340
--- /dev/null
+++ b/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch
@@ -0,0 +1,115 @@
+From stable+bounces-192195-greg=kroah.com@vger.kernel.org Mon Nov  3 21:51:05 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:48 -0500
+Subject: drm/sched: Optimise drm_sched_entity_push_job
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-1-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit d42a254633c773921884a19e8a1a0f53a31150c3 ]
+
+In FIFO mode (which is the default), both drm_sched_entity_push_job() and
+drm_sched_rq_update_fifo(), where the latter calls the former, are
+currently taking and releasing the same entity->rq_lock.
+
+We can avoid that design inelegance, and also have a miniscule
+efficiency improvement on the submit from idle path, by introducing a new
+drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to
+its callers.
+
+v2:
+ * Remove drm_sched_rq_update_fifo() altogether. (Christian)
+
+v3:
+ * Improved commit message. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |   13 +++++++++----
+ drivers/gpu/drm/scheduler/sched_main.c   |    6 +++---
+ include/drm/gpu_scheduler.h              |    2 +-
+ 3 files changed, 13 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -506,8 +506,12 @@ struct drm_sched_job *drm_sched_entity_p
+               struct drm_sched_job *next;
+ 
+               next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+-              if (next)
+-                      drm_sched_rq_update_fifo(entity, next->submit_ts);
++              if (next) {
++                      spin_lock(&entity->rq_lock);
++                      drm_sched_rq_update_fifo_locked(entity,
++                                                      next->submit_ts);
++                      spin_unlock(&entity->rq_lock);
++              }
+       }
+ 
+       /* Jobs and entities might have different lifecycles. Since we're
+@@ -607,10 +611,11 @@ void drm_sched_entity_push_job(struct dr
+               sched = rq->sched;
+ 
+               drm_sched_rq_add_entity(rq, entity);
+-              spin_unlock(&entity->rq_lock);
+ 
+               if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+-                      drm_sched_rq_update_fifo(entity, submit_ts);
++                      drm_sched_rq_update_fifo_locked(entity, submit_ts);
++
++              spin_unlock(&entity->rq_lock);
+ 
+               drm_sched_wakeup(sched);
+       }
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -169,14 +169,15 @@ static inline void drm_sched_rq_remove_f
+       }
+ }
+ 
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts)
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
+ {
+       /*
+        * Both locks need to be grabbed, one to protect from entity->rq change
+        * for entity from within concurrent drm_sched_entity_select_rq and the
+        * other to update the rb tree structure.
+        */
+-      spin_lock(&entity->rq_lock);
++      lockdep_assert_held(&entity->rq_lock);
++
+       spin_lock(&entity->rq->lock);
+ 
+       drm_sched_rq_remove_fifo_locked(entity);
+@@ -187,7 +188,6 @@ void drm_sched_rq_update_fifo(struct drm
+                     drm_sched_entity_compare_before);
+ 
+       spin_unlock(&entity->rq->lock);
+-      spin_unlock(&entity->rq_lock);
+ }
+ 
+ /**
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_
+ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
+                               struct drm_sched_entity *entity);
+ 
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
+ 
+ int drm_sched_entity_init(struct drm_sched_entity *entity,
+                         enum drm_sched_priority priority,
diff --git a/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch b/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch

new file mode 100644 (file)

index 0000000..4ac4710
--- /dev/null
+++ b/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
@@ -0,0 +1,203 @@
+From stable+bounces-192196-greg=kroah.com@vger.kernel.org Mon Nov  3 21:47:09 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:49 -0500
+Subject: drm/sched: Re-group and rename the entity run-queue lock
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-2-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit f93126f5d55920d1447ef00a3fbe6706f40f53de ]
+
+When writing to a drm_sched_entity's run-queue, writers are protected
+through the lock drm_sched_entity.rq_lock. This naming, however,
+frequently collides with the separate internal lock of struct
+drm_sched_rq, resulting in uses like this:
+
+       spin_lock(&entity->rq_lock);
+       spin_lock(&entity->rq->lock);
+
+Rename drm_sched_entity.rq_lock to improve readability. While at it,
+re-order that struct's members to make it more obvious what the lock
+protects.
+
+v2:
+ * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Suggested-by: Christian König <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+[pstanner: Fix typo in docstring]
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |   28 ++++++++++++++--------------
+ drivers/gpu/drm/scheduler/sched_main.c   |    2 +-
+ include/drm/gpu_scheduler.h              |   21 +++++++++++----------
+ 3 files changed, 26 insertions(+), 25 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -106,7 +106,7 @@ int drm_sched_entity_init(struct drm_sch
+       /* We start in an idle state. */
+       complete_all(&entity->entity_idle);
+ 
+-      spin_lock_init(&entity->rq_lock);
++      spin_lock_init(&entity->lock);
+       spsc_queue_init(&entity->job_queue);
+ 
+       atomic_set(&entity->fence_seq, 0);
+@@ -134,10 +134,10 @@ void drm_sched_entity_modify_sched(struc
+ {
+       WARN_ON(!num_sched_list || !sched_list);
+ 
+-      spin_lock(&entity->rq_lock);
++      spin_lock(&entity->lock);
+       entity->sched_list = sched_list;
+       entity->num_sched_list = num_sched_list;
+-      spin_unlock(&entity->rq_lock);
++      spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_modify_sched);
+ 
+@@ -246,10 +246,10 @@ static void drm_sched_entity_kill(struct
+       if (!entity->rq)
+               return;
+ 
+-      spin_lock(&entity->rq_lock);
++      spin_lock(&entity->lock);
+       entity->stopped = true;
+       drm_sched_rq_remove_entity(entity->rq, entity);
+-      spin_unlock(&entity->rq_lock);
++      spin_unlock(&entity->lock);
+ 
+       /* Make sure this entity is not used by the scheduler at the moment */
+       wait_for_completion(&entity->entity_idle);
+@@ -395,9 +395,9 @@ static void drm_sched_entity_wakeup(stru
+ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
+                                  enum drm_sched_priority priority)
+ {
+-      spin_lock(&entity->rq_lock);
++      spin_lock(&entity->lock);
+       entity->priority = priority;
+-      spin_unlock(&entity->rq_lock);
++      spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_set_priority);
+ 
+@@ -507,10 +507,10 @@ struct drm_sched_job *drm_sched_entity_p
+ 
+               next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+               if (next) {
+-                      spin_lock(&entity->rq_lock);
++                      spin_lock(&entity->lock);
+                       drm_sched_rq_update_fifo_locked(entity,
+                                                       next->submit_ts);
+-                      spin_unlock(&entity->rq_lock);
++                      spin_unlock(&entity->lock);
+               }
+       }
+ 
+@@ -551,14 +551,14 @@ void drm_sched_entity_select_rq(struct d
+       if (fence && !dma_fence_is_signaled(fence))
+               return;
+ 
+-      spin_lock(&entity->rq_lock);
++      spin_lock(&entity->lock);
+       sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
+       rq = sched ? sched->sched_rq[entity->priority] : NULL;
+       if (rq != entity->rq) {
+               drm_sched_rq_remove_entity(entity->rq, entity);
+               entity->rq = rq;
+       }
+-      spin_unlock(&entity->rq_lock);
++      spin_unlock(&entity->lock);
+ 
+       if (entity->num_sched_list == 1)
+               entity->sched_list = NULL;
+@@ -599,9 +599,9 @@ void drm_sched_entity_push_job(struct dr
+               struct drm_sched_rq *rq;
+ 
+               /* Add the entity to the run queue */
+-              spin_lock(&entity->rq_lock);
++              spin_lock(&entity->lock);
+               if (entity->stopped) {
+-                      spin_unlock(&entity->rq_lock);
++                      spin_unlock(&entity->lock);
+ 
+                       DRM_ERROR("Trying to push to a killed entity\n");
+                       return;
+@@ -615,7 +615,7 @@ void drm_sched_entity_push_job(struct dr
+               if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+                       drm_sched_rq_update_fifo_locked(entity, submit_ts);
+ 
+-              spin_unlock(&entity->rq_lock);
++              spin_unlock(&entity->lock);
+ 
+               drm_sched_wakeup(sched);
+       }
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(str
+        * for entity from within concurrent drm_sched_entity_select_rq and the
+        * other to update the rb tree structure.
+        */
+-      lockdep_assert_held(&entity->rq_lock);
++      lockdep_assert_held(&entity->lock);
+ 
+       spin_lock(&entity->rq->lock);
+ 
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -97,13 +97,21 @@ struct drm_sched_entity {
+       struct list_head                list;
+ 
+       /**
++       * @lock:
++       *
++       * Lock protecting the run-queue (@rq) to which this entity belongs,
++       * @priority and the list of schedulers (@sched_list, @num_sched_list).
++       */
++      spinlock_t                      lock;
++
++      /**
+        * @rq:
+        *
+        * Runqueue on which this entity is currently scheduled.
+        *
+        * FIXME: Locking is very unclear for this. Writers are protected by
+-       * @rq_lock, but readers are generally lockless and seem to just race
+-       * with not even a READ_ONCE.
++       * @lock, but readers are generally lockless and seem to just race with
++       * not even a READ_ONCE.
+        */
+       struct drm_sched_rq             *rq;
+ 
+@@ -136,18 +144,11 @@ struct drm_sched_entity {
+        * @priority:
+        *
+        * Priority of the entity. This can be modified by calling
+-       * drm_sched_entity_set_priority(). Protected by &rq_lock.
++       * drm_sched_entity_set_priority(). Protected by @lock.
+        */
+       enum drm_sched_priority         priority;
+ 
+       /**
+-       * @rq_lock:
+-       *
+-       * Lock to modify the runqueue to which this entity belongs.
+-       */
+-      spinlock_t                      rq_lock;
+-
+-      /**
+        * @job_queue: the list of jobs of this entity.
+        */
+       struct spsc_queue               job_queue;
diff --git a/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch b/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch

new file mode 100644 (file)

index 0000000..7bece7f
--- /dev/null
+++ b/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
@@ -0,0 +1,57 @@
+From stable+bounces-192140-greg=kroah.com@vger.kernel.org Mon Nov  3 11:24:25 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  2 Nov 2025 21:24:16 -0500
+Subject: s390: Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+To: stable@vger.kernel.org
+Cc: Heiko Carstens <hca@linux.ibm.com>, Luiz Capitulino <luizcap@redhat.com>, Gerald Schaefer <gerald.schaefer@linux.ibm.com>, David Hildenbrand <david@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022416.3808769-1-sashal@kernel.org>
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 64e2f60f355e556337fcffe80b9bcff1b22c9c42 ]
+
+As reported by Luiz Capitulino enabling HVO on s390 leads to reproducible
+crashes. The problem is that kernel page tables are modified without
+flushing corresponding TLB entries.
+
+Even if it looks like the empty flush_tlb_all() implementation on s390 is
+the problem, it is actually a different problem: on s390 it is not allowed
+to replace an active/valid page table entry with another valid page table
+entry without the detour over an invalid entry. A direct replacement may
+lead to random crashes and/or data corruption.
+
+In order to invalidate an entry special instructions have to be used
+(e.g. ipte or idte). Alternatively there are also special instructions
+available which allow to replace a valid entry with a different valid
+entry (e.g. crdte or cspg).
+
+Given that the HVO code currently does not provide the hooks to allow for
+an implementation which is compliant with the s390 architecture
+requirements, disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP again, which is
+basically a revert of the original patch which enabled it.
+
+Reported-by: Luiz Capitulino <luizcap@redhat.com>
+Closes: https://lore.kernel.org/all/20251028153930.37107-1-luizcap@redhat.com/
+Fixes: 00a34d5a99c0 ("s390: select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP")
+Cc: stable@vger.kernel.org
+Tested-by: Luiz Capitulino <luizcap@redhat.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -135,7 +135,6 @@ config S390
+       select ARCH_WANT_IPC_PARSE_VERSION
+       select ARCH_WANT_KERNEL_PMD_MKWRITE
+       select ARCH_WANT_LD_ORPHAN_WARN
+-      select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+       select BUILDTIME_TABLE_SORT
+       select CLONE_BACKWARDS2
+       select DCACHE_WORD_ACCESS if !KMSAN
diff --git a/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch b/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch

new file mode 100644 (file)

index 0000000..fa25727
--- /dev/null
+++ b/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
@@ -0,0 +1,120 @@
+From stable+bounces-192141-greg=kroah.com@vger.kernel.org Mon Nov  3 11:24:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  2 Nov 2025 21:24:19 -0500
+Subject: s390/pci: Avoid deadlock between PCI error recovery and mlx5 crdump
+To: stable@vger.kernel.org
+Cc: Gerd Bayer <gbayer@linux.ibm.com>, Niklas Schnelle <schnelle@linux.ibm.com>, Heiko Carstens <hca@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022419.3808824-1-sashal@kernel.org>
+
+From: Gerd Bayer <gbayer@linux.ibm.com>
+
+[ Upstream commit 0fd20f65df6aa430454a0deed8f43efa91c54835 ]
+
+Do not block PCI config accesses through pci_cfg_access_lock() when
+executing the s390 variant of PCI error recovery: Acquire just
+device_lock() instead of pci_dev_lock() as powerpc's EEH and
+generig PCI AER processing do.
+
+During error recovery testing a pair of tasks was reported to be hung:
+
+mlx5_core 0000:00:00.1: mlx5_health_try_recover:338:(pid 5553): health recovery flow aborted, PCI reads still not working
+INFO: task kmcheck:72 blocked for more than 122 seconds.
+      Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kmcheck         state:D stack:0     pid:72    tgid:72    ppid:2      flags:0x00000000
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<000000065256f572>] schedule_preempt_disabled+0x22/0x30
+ [<0000000652570a94>] __mutex_lock.constprop.0+0x484/0x8a8
+ [<000003ff800673a4>] mlx5_unload_one+0x34/0x58 [mlx5_core]
+ [<000003ff8006745c>] mlx5_pci_err_detected+0x94/0x140 [mlx5_core]
+ [<0000000652556c5a>] zpci_event_attempt_error_recovery+0xf2/0x398
+ [<0000000651b9184a>] __zpci_event_error+0x23a/0x2c0
+INFO: task kworker/u1664:6:1514 blocked for more than 122 seconds.
+      Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kworker/u1664:6 state:D stack:0     pid:1514  tgid:1514  ppid:2      flags:0x00000000
+Workqueue: mlx5_health0000:00:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<0000000652172e28>] pci_wait_cfg+0x80/0xe8
+ [<0000000652172f94>] pci_cfg_access_lock+0x74/0x88
+ [<000003ff800916b6>] mlx5_vsc_gw_lock+0x36/0x178 [mlx5_core]
+ [<000003ff80098824>] mlx5_crdump_collect+0x34/0x1c8 [mlx5_core]
+ [<000003ff80074b62>] mlx5_fw_fatal_reporter_dump+0x6a/0xe8 [mlx5_core]
+ [<0000000652512242>] devlink_health_do_dump.part.0+0x82/0x168
+ [<0000000652513212>] devlink_health_report+0x19a/0x230
+ [<000003ff80075a12>] mlx5_fw_fatal_reporter_err_work+0xba/0x1b0 [mlx5_core]
+
+No kernel log of the exact same error with an upstream kernel is
+available - but the very same deadlock situation can be constructed there,
+too:
+
+- task: kmcheck
+  mlx5_unload_one() tries to acquire devlink lock while the PCI error
+  recovery code has set pdev->block_cfg_access by way of
+  pci_cfg_access_lock()
+- task: kworker
+  mlx5_crdump_collect() tries to set block_cfg_access through
+  pci_cfg_access_lock() while devlink_health_report() had acquired
+  the devlink lock.
+
+A similar deadlock situation can be reproduced by requesting a
+crdump with
+  > devlink health dump show pci/<BDF> reporter fw_fatal
+
+while PCI error recovery is executed on the same <BDF> physical function
+by mlx5_core's pci_error_handlers. On s390 this can be injected with
+  > zpcictl --reset-fw <BDF>
+
+Tests with this patch failed to reproduce that second deadlock situation,
+the devlink command is rejected with "kernel answers: Permission denied" -
+and we get a kernel log message of:
+
+mlx5_core 1ed0:00:00.1: mlx5_crdump_collect:50:(pid 254382): crdump: failed to lock vsc gw err -5
+
+because the config read of VSC_SEMAPHORE is rejected by the underlying
+hardware.
+
+Two prior attempts to address this issue have been discussed and
+ultimately rejected [see link], with the primary argument that s390's
+implementation of PCI error recovery is imposing restrictions that
+neither powerpc's EEH nor PCI AER handling need. Tests show that PCI
+error recovery on s390 is running to completion even without blocking
+access to PCI config space.
+
+Link: https://lore.kernel.org/all/20251007144826.2825134-1-gbayer@linux.ibm.com/
+Cc: stable@vger.kernel.org
+Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
+Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/pci/pci_event.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/pci/pci_event.c
++++ b/arch/s390/pci/pci_event.c
+@@ -180,7 +180,7 @@ static pci_ers_result_t zpci_event_attem
+        * is unbound or probed and that userspace can't access its
+        * configuration space while we perform recovery.
+        */
+-      pci_dev_lock(pdev);
++      device_lock(&pdev->dev);
+       if (pdev->error_state == pci_channel_io_perm_failure) {
+               ers_res = PCI_ERS_RESULT_DISCONNECT;
+               goto out_unlock;
+@@ -228,7 +228,7 @@ static pci_ers_result_t zpci_event_attem
+       if (driver->err_handler->resume)
+               driver->err_handler->resume(pdev);
+ out_unlock:
+-      pci_dev_unlock(pdev);
++      device_unlock(&pdev->dev);
+ 
+       return ers_res;
+ }
diff --git a/queue-6.12/series b/queue-6.12/series

index 5d8c9f8a315bfe48bb40c901b28e05ae92a8a1f1..d4200bcc83b551342f55e18d89f670b616bbbb1c 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -78,3 +78,10 @@ sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch
  cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch
  cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch
  mfd-kempld-switch-back-to-earlier-init-behavior.patch
+x86-cpu-amd-add-rdseed-fix-for-zen5.patch
+usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
+drm-sched-optimise-drm_sched_entity_push_job.patch
+drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
+drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
+s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
+s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
diff --git a/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch b/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch

new file mode 100644 (file)

index 0000000..fdd8072
--- /dev/null
+++ b/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
@@ -0,0 +1,54 @@
+From cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 Mon Sep 17 00:00:00 2001
+From: Owen Gu <guhuinan@xiaomi.com>
+Date: Mon, 15 Sep 2025 17:29:07 +0800
+Subject: usb: gadget: f_fs: Fix epfile null pointer access after ep enable.
+
+From: Owen Gu <guhuinan@xiaomi.com>
+
+commit cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 upstream.
+
+A race condition occurs when ffs_func_eps_enable() runs concurrently
+with ffs_data_reset(). The ffs_data_clear() called in ffs_data_reset()
+sets ffs->epfiles to NULL before resetting ffs->eps_count to 0, leading
+to a NULL pointer dereference when accessing epfile->ep in
+ffs_func_eps_enable() after successful usb_ep_enable().
+
+The ffs->epfiles pointer is set to NULL in both ffs_data_clear() and
+ffs_data_close() functions, and its modification is protected by the
+spinlock ffs->eps_lock. And the whole ffs_func_eps_enable() function
+is also protected by ffs->eps_lock.
+
+Thus, add NULL pointer handling for ffs->epfiles in the
+ffs_func_eps_enable() function to fix issues
+
+Signed-off-by: Owen Gu <guhuinan@xiaomi.com>
+Link: https://lore.kernel.org/r/20250915092907.17802-1-guhuinan@xiaomi.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/gadget/function/f_fs.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -2418,7 +2418,12 @@ static int ffs_func_eps_enable(struct ff
+       ep = func->eps;
+       epfile = ffs->epfiles;
+       count = ffs->eps_count;
+-      while(count--) {
++      if (!epfile) {
++              ret = -ENOMEM;
++              goto done;
++      }
++
++      while (count--) {
+               ep->ep->driver_data = ep;
+ 
+               ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+@@ -2442,6 +2447,7 @@ static int ffs_func_eps_enable(struct ff
+       }
+ 
+       wake_up_interruptible(&ffs->wait);
++done:
+       spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+ 
+       return ret;
diff --git a/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch b/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch

new file mode 100644 (file)

index 0000000..84bbf1f
--- /dev/null
+++ b/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch
@@ -0,0 +1,75 @@
+From 607b9fb2ce248cc5b633c5949e0153838992c152 Mon Sep 17 00:00:00 2001
+From: Gregory Price <gourry@gourry.net>
+Date: Mon, 20 Oct 2025 11:13:55 +0200
+Subject: x86/CPU/AMD: Add RDSEED fix for Zen5
+
+From: Gregory Price <gourry@gourry.net>
+
+commit 607b9fb2ce248cc5b633c5949e0153838992c152 upstream.
+
+There's an issue with RDSEED's 16-bit and 32-bit register output
+variants on Zen5 which return a random value of 0 "at a rate inconsistent
+with randomness while incorrectly signaling success (CF=1)". Search the
+web for AMD-SB-7055 for more detail.
+
+Add a fix glue which checks microcode revisions.
+
+  [ bp: Add microcode revisions checking, rewrite. ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20251018024010.4112396-1-gourry@gourry.net
+[ bp: 6.12 backport: use the alternative microcode version checking. ]
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/amd.c |   35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1018,8 +1018,43 @@ static void init_amd_zen4(struct cpuinfo
+       }
+ }
+ 
++static bool check_rdseed_microcode(void)
++{
++      struct cpuinfo_x86 *c = &boot_cpu_data;
++      union zen_patch_rev p;
++      u32 min_rev = 0;
++
++      p.ext_fam       = c->x86 - 0xf;
++      p.model         = c->x86_model;
++      p.ext_model     = c->x86_model >> 4;
++      p.stepping      = c->x86_stepping;
++      /* reserved bits are expected to be 0 in test below */
++      p.__reserved    = 0;
++
++      if (cpu_has(c, X86_FEATURE_ZEN5)) {
++              switch (p.ucode_rev >> 8) {
++              case 0xb0021:   min_rev = 0xb00215a; break;
++              case 0xb1010:   min_rev = 0xb101054; break;
++              default:
++                      pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n",
++                               __func__, p.ucode_rev, c->microcode);
++                      return false;
++              }
++      }
++
++      if (!min_rev)
++              return false;
++
++      return c->microcode >= min_rev;
++}
++
+ static void init_amd_zen5(struct cpuinfo_x86 *c)
+ {
++      if (!check_rdseed_microcode()) {
++              clear_cpu_cap(c, X86_FEATURE_RDSEED);
++              msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
++              pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n");
++      }
+ }
+ 
+ static void init_amd(struct cpuinfo_x86 *c)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 4 Nov 2025 05:30:55 +0000 (14:30 +0900)
queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history
queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch	[new file with mode: 0644]	patch \| blob