From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 4 Nov 2025 05:30:55 +0000 (+0900)
Subject: 6.12-stable patches
X-Git-Tag: v6.12.58~30
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a45420df6b86929e1029eab70190e3c90e488de3;p=thirdparty%2Fkernel%2Fstable-queue.git

6.12-stable patches

added patches:
	drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
	drm-sched-optimise-drm_sched_entity_push_job.patch
	drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
	s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
	s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
	usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
	x86-cpu-amd-add-rdseed-fix-for-zen5.patch
---

diff --git a/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch b/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
new file mode 100644
index 0000000000..01aecc864b
--- /dev/null
+++ b/queue-6.12/drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
@@ -0,0 +1,43 @@
+From stable+bounces-192197-greg=kroah.com@vger.kernel.org Mon Nov  3 21:47:10 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:50 -0500
+Subject: drm/sched: Fix race in drm_sched_entity_select_rq()
+To: stable@vger.kernel.org
+Cc: Philipp Stanner <phasta@kernel.org>, Tvrtko Ursulin <tvrtko.ursulin@igalia.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-3-sashal@kernel.org>
+
+From: Philipp Stanner <phasta@kernel.org>
+
+[ Upstream commit d25e3a610bae03bffc5c14b5d944a5d0cd844678 ]
+
+In a past bug fix it was forgotten that entity access must be protected
+by the entity lock. That's a data race and potentially UB.
+
+Move the spin_unlock() to the appropriate position.
+
+Cc: stable@vger.kernel.org # v5.13+
+Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3")
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Signed-off-by: Philipp Stanner <phasta@kernel.org>
+Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -558,10 +558,11 @@ void drm_sched_entity_select_rq(struct d
+ 		drm_sched_rq_remove_entity(entity->rq, entity);
+ 		entity->rq = rq;
+ 	}
+-	spin_unlock(&entity->lock);
+ 
+ 	if (entity->num_sched_list == 1)
+ 		entity->sched_list = NULL;
++
++	spin_unlock(&entity->lock);
+ }
+ 
+ /**
diff --git a/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch b/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch
new file mode 100644
index 0000000000..60ff340ec5
--- /dev/null
+++ b/queue-6.12/drm-sched-optimise-drm_sched_entity_push_job.patch
@@ -0,0 +1,115 @@
+From stable+bounces-192195-greg=kroah.com@vger.kernel.org Mon Nov  3 21:51:05 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:48 -0500
+Subject: drm/sched: Optimise drm_sched_entity_push_job
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-1-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit d42a254633c773921884a19e8a1a0f53a31150c3 ]
+
+In FIFO mode (which is the default), both drm_sched_entity_push_job() and
+drm_sched_rq_update_fifo(), where the latter calls the former, are
+currently taking and releasing the same entity->rq_lock.
+
+We can avoid that design inelegance, and also have a miniscule
+efficiency improvement on the submit from idle path, by introducing a new
+drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to
+its callers.
+
+v2:
+ * Remove drm_sched_rq_update_fifo() altogether. (Christian)
+
+v3:
+ * Improved commit message. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Cc: Christian KÃ¶nig <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |   13 +++++++++----
+ drivers/gpu/drm/scheduler/sched_main.c   |    6 +++---
+ include/drm/gpu_scheduler.h              |    2 +-
+ 3 files changed, 13 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -506,8 +506,12 @@ struct drm_sched_job *drm_sched_entity_p
+ 		struct drm_sched_job *next;
+ 
+ 		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+-		if (next)
+-			drm_sched_rq_update_fifo(entity, next->submit_ts);
++		if (next) {
++			spin_lock(&entity->rq_lock);
++			drm_sched_rq_update_fifo_locked(entity,
++							next->submit_ts);
++			spin_unlock(&entity->rq_lock);
++		}
+ 	}
+ 
+ 	/* Jobs and entities might have different lifecycles. Since we're
+@@ -607,10 +611,11 @@ void drm_sched_entity_push_job(struct dr
+ 		sched = rq->sched;
+ 
+ 		drm_sched_rq_add_entity(rq, entity);
+-		spin_unlock(&entity->rq_lock);
+ 
+ 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+-			drm_sched_rq_update_fifo(entity, submit_ts);
++			drm_sched_rq_update_fifo_locked(entity, submit_ts);
++
++		spin_unlock(&entity->rq_lock);
+ 
+ 		drm_sched_wakeup(sched);
+ 	}
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -169,14 +169,15 @@ static inline void drm_sched_rq_remove_f
+ 	}
+ }
+ 
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts)
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
+ {
+ 	/*
+ 	 * Both locks need to be grabbed, one to protect from entity->rq change
+ 	 * for entity from within concurrent drm_sched_entity_select_rq and the
+ 	 * other to update the rb tree structure.
+ 	 */
+-	spin_lock(&entity->rq_lock);
++	lockdep_assert_held(&entity->rq_lock);
++
+ 	spin_lock(&entity->rq->lock);
+ 
+ 	drm_sched_rq_remove_fifo_locked(entity);
+@@ -187,7 +188,6 @@ void drm_sched_rq_update_fifo(struct drm
+ 		      drm_sched_entity_compare_before);
+ 
+ 	spin_unlock(&entity->rq->lock);
+-	spin_unlock(&entity->rq_lock);
+ }
+ 
+ /**
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_
+ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
+ 				struct drm_sched_entity *entity);
+ 
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
+ 
+ int drm_sched_entity_init(struct drm_sched_entity *entity,
+ 			  enum drm_sched_priority priority,
diff --git a/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch b/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
new file mode 100644
index 0000000000..4ac4710ab2
--- /dev/null
+++ b/queue-6.12/drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
@@ -0,0 +1,203 @@
+From stable+bounces-192196-greg=kroah.com@vger.kernel.org Mon Nov  3 21:47:09 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon,  3 Nov 2025 07:44:49 -0500
+Subject: drm/sched: Re-group and rename the entity run-queue lock
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-2-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit f93126f5d55920d1447ef00a3fbe6706f40f53de ]
+
+When writing to a drm_sched_entity's run-queue, writers are protected
+through the lock drm_sched_entity.rq_lock. This naming, however,
+frequently collides with the separate internal lock of struct
+drm_sched_rq, resulting in uses like this:
+
+	spin_lock(&entity->rq_lock);
+	spin_lock(&entity->rq->lock);
+
+Rename drm_sched_entity.rq_lock to improve readability. While at it,
+re-order that struct's members to make it more obvious what the lock
+protects.
+
+v2:
+ * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Suggested-by: Christian KÃ¶nig <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
+[pstanner: Fix typo in docstring]
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c |   28 ++++++++++++++--------------
+ drivers/gpu/drm/scheduler/sched_main.c   |    2 +-
+ include/drm/gpu_scheduler.h              |   21 +++++++++++----------
+ 3 files changed, 26 insertions(+), 25 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -106,7 +106,7 @@ int drm_sched_entity_init(struct drm_sch
+ 	/* We start in an idle state. */
+ 	complete_all(&entity->entity_idle);
+ 
+-	spin_lock_init(&entity->rq_lock);
++	spin_lock_init(&entity->lock);
+ 	spsc_queue_init(&entity->job_queue);
+ 
+ 	atomic_set(&entity->fence_seq, 0);
+@@ -134,10 +134,10 @@ void drm_sched_entity_modify_sched(struc
+ {
+ 	WARN_ON(!num_sched_list || !sched_list);
+ 
+-	spin_lock(&entity->rq_lock);
++	spin_lock(&entity->lock);
+ 	entity->sched_list = sched_list;
+ 	entity->num_sched_list = num_sched_list;
+-	spin_unlock(&entity->rq_lock);
++	spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_modify_sched);
+ 
+@@ -246,10 +246,10 @@ static void drm_sched_entity_kill(struct
+ 	if (!entity->rq)
+ 		return;
+ 
+-	spin_lock(&entity->rq_lock);
++	spin_lock(&entity->lock);
+ 	entity->stopped = true;
+ 	drm_sched_rq_remove_entity(entity->rq, entity);
+-	spin_unlock(&entity->rq_lock);
++	spin_unlock(&entity->lock);
+ 
+ 	/* Make sure this entity is not used by the scheduler at the moment */
+ 	wait_for_completion(&entity->entity_idle);
+@@ -395,9 +395,9 @@ static void drm_sched_entity_wakeup(stru
+ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
+ 				   enum drm_sched_priority priority)
+ {
+-	spin_lock(&entity->rq_lock);
++	spin_lock(&entity->lock);
+ 	entity->priority = priority;
+-	spin_unlock(&entity->rq_lock);
++	spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_set_priority);
+ 
+@@ -507,10 +507,10 @@ struct drm_sched_job *drm_sched_entity_p
+ 
+ 		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+ 		if (next) {
+-			spin_lock(&entity->rq_lock);
++			spin_lock(&entity->lock);
+ 			drm_sched_rq_update_fifo_locked(entity,
+ 							next->submit_ts);
+-			spin_unlock(&entity->rq_lock);
++			spin_unlock(&entity->lock);
+ 		}
+ 	}
+ 
+@@ -551,14 +551,14 @@ void drm_sched_entity_select_rq(struct d
+ 	if (fence && !dma_fence_is_signaled(fence))
+ 		return;
+ 
+-	spin_lock(&entity->rq_lock);
++	spin_lock(&entity->lock);
+ 	sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
+ 	rq = sched ? sched->sched_rq[entity->priority] : NULL;
+ 	if (rq != entity->rq) {
+ 		drm_sched_rq_remove_entity(entity->rq, entity);
+ 		entity->rq = rq;
+ 	}
+-	spin_unlock(&entity->rq_lock);
++	spin_unlock(&entity->lock);
+ 
+ 	if (entity->num_sched_list == 1)
+ 		entity->sched_list = NULL;
+@@ -599,9 +599,9 @@ void drm_sched_entity_push_job(struct dr
+ 		struct drm_sched_rq *rq;
+ 
+ 		/* Add the entity to the run queue */
+-		spin_lock(&entity->rq_lock);
++		spin_lock(&entity->lock);
+ 		if (entity->stopped) {
+-			spin_unlock(&entity->rq_lock);
++			spin_unlock(&entity->lock);
+ 
+ 			DRM_ERROR("Trying to push to a killed entity\n");
+ 			return;
+@@ -615,7 +615,7 @@ void drm_sched_entity_push_job(struct dr
+ 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+ 			drm_sched_rq_update_fifo_locked(entity, submit_ts);
+ 
+-		spin_unlock(&entity->rq_lock);
++		spin_unlock(&entity->lock);
+ 
+ 		drm_sched_wakeup(sched);
+ 	}
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(str
+ 	 * for entity from within concurrent drm_sched_entity_select_rq and the
+ 	 * other to update the rb tree structure.
+ 	 */
+-	lockdep_assert_held(&entity->rq_lock);
++	lockdep_assert_held(&entity->lock);
+ 
+ 	spin_lock(&entity->rq->lock);
+ 
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -97,13 +97,21 @@ struct drm_sched_entity {
+ 	struct list_head		list;
+ 
+ 	/**
++	 * @lock:
++	 *
++	 * Lock protecting the run-queue (@rq) to which this entity belongs,
++	 * @priority and the list of schedulers (@sched_list, @num_sched_list).
++	 */
++	spinlock_t			lock;
++
++	/**
+ 	 * @rq:
+ 	 *
+ 	 * Runqueue on which this entity is currently scheduled.
+ 	 *
+ 	 * FIXME: Locking is very unclear for this. Writers are protected by
+-	 * @rq_lock, but readers are generally lockless and seem to just race
+-	 * with not even a READ_ONCE.
++	 * @lock, but readers are generally lockless and seem to just race with
++	 * not even a READ_ONCE.
+ 	 */
+ 	struct drm_sched_rq		*rq;
+ 
+@@ -136,18 +144,11 @@ struct drm_sched_entity {
+ 	 * @priority:
+ 	 *
+ 	 * Priority of the entity. This can be modified by calling
+-	 * drm_sched_entity_set_priority(). Protected by &rq_lock.
++	 * drm_sched_entity_set_priority(). Protected by @lock.
+ 	 */
+ 	enum drm_sched_priority         priority;
+ 
+ 	/**
+-	 * @rq_lock:
+-	 *
+-	 * Lock to modify the runqueue to which this entity belongs.
+-	 */
+-	spinlock_t			rq_lock;
+-
+-	/**
+ 	 * @job_queue: the list of jobs of this entity.
+ 	 */
+ 	struct spsc_queue		job_queue;
diff --git a/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch b/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
new file mode 100644
index 0000000000..7bece7fd96
--- /dev/null
+++ b/queue-6.12/s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
@@ -0,0 +1,57 @@
+From stable+bounces-192140-greg=kroah.com@vger.kernel.org Mon Nov  3 11:24:25 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  2 Nov 2025 21:24:16 -0500
+Subject: s390: Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+To: stable@vger.kernel.org
+Cc: Heiko Carstens <hca@linux.ibm.com>, Luiz Capitulino <luizcap@redhat.com>, Gerald Schaefer <gerald.schaefer@linux.ibm.com>, David Hildenbrand <david@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022416.3808769-1-sashal@kernel.org>
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 64e2f60f355e556337fcffe80b9bcff1b22c9c42 ]
+
+As reported by Luiz Capitulino enabling HVO on s390 leads to reproducible
+crashes. The problem is that kernel page tables are modified without
+flushing corresponding TLB entries.
+
+Even if it looks like the empty flush_tlb_all() implementation on s390 is
+the problem, it is actually a different problem: on s390 it is not allowed
+to replace an active/valid page table entry with another valid page table
+entry without the detour over an invalid entry. A direct replacement may
+lead to random crashes and/or data corruption.
+
+In order to invalidate an entry special instructions have to be used
+(e.g. ipte or idte). Alternatively there are also special instructions
+available which allow to replace a valid entry with a different valid
+entry (e.g. crdte or cspg).
+
+Given that the HVO code currently does not provide the hooks to allow for
+an implementation which is compliant with the s390 architecture
+requirements, disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP again, which is
+basically a revert of the original patch which enabled it.
+
+Reported-by: Luiz Capitulino <luizcap@redhat.com>
+Closes: https://lore.kernel.org/all/20251028153930.37107-1-luizcap@redhat.com/
+Fixes: 00a34d5a99c0 ("s390: select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP")
+Cc: stable@vger.kernel.org
+Tested-by: Luiz Capitulino <luizcap@redhat.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -135,7 +135,6 @@ config S390
+ 	select ARCH_WANT_IPC_PARSE_VERSION
+ 	select ARCH_WANT_KERNEL_PMD_MKWRITE
+ 	select ARCH_WANT_LD_ORPHAN_WARN
+-	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+ 	select BUILDTIME_TABLE_SORT
+ 	select CLONE_BACKWARDS2
+ 	select DCACHE_WORD_ACCESS if !KMSAN
diff --git a/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch b/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
new file mode 100644
index 0000000000..fa25727f1f
--- /dev/null
+++ b/queue-6.12/s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
@@ -0,0 +1,120 @@
+From stable+bounces-192141-greg=kroah.com@vger.kernel.org Mon Nov  3 11:24:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun,  2 Nov 2025 21:24:19 -0500
+Subject: s390/pci: Avoid deadlock between PCI error recovery and mlx5 crdump
+To: stable@vger.kernel.org
+Cc: Gerd Bayer <gbayer@linux.ibm.com>, Niklas Schnelle <schnelle@linux.ibm.com>, Heiko Carstens <hca@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022419.3808824-1-sashal@kernel.org>
+
+From: Gerd Bayer <gbayer@linux.ibm.com>
+
+[ Upstream commit 0fd20f65df6aa430454a0deed8f43efa91c54835 ]
+
+Do not block PCI config accesses through pci_cfg_access_lock() when
+executing the s390 variant of PCI error recovery: Acquire just
+device_lock() instead of pci_dev_lock() as powerpc's EEH and
+generig PCI AER processing do.
+
+During error recovery testing a pair of tasks was reported to be hung:
+
+mlx5_core 0000:00:00.1: mlx5_health_try_recover:338:(pid 5553): health recovery flow aborted, PCI reads still not working
+INFO: task kmcheck:72 blocked for more than 122 seconds.
+      Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kmcheck         state:D stack:0     pid:72    tgid:72    ppid:2      flags:0x00000000
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<000000065256f572>] schedule_preempt_disabled+0x22/0x30
+ [<0000000652570a94>] __mutex_lock.constprop.0+0x484/0x8a8
+ [<000003ff800673a4>] mlx5_unload_one+0x34/0x58 [mlx5_core]
+ [<000003ff8006745c>] mlx5_pci_err_detected+0x94/0x140 [mlx5_core]
+ [<0000000652556c5a>] zpci_event_attempt_error_recovery+0xf2/0x398
+ [<0000000651b9184a>] __zpci_event_error+0x23a/0x2c0
+INFO: task kworker/u1664:6:1514 blocked for more than 122 seconds.
+      Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kworker/u1664:6 state:D stack:0     pid:1514  tgid:1514  ppid:2      flags:0x00000000
+Workqueue: mlx5_health0000:00:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<0000000652172e28>] pci_wait_cfg+0x80/0xe8
+ [<0000000652172f94>] pci_cfg_access_lock+0x74/0x88
+ [<000003ff800916b6>] mlx5_vsc_gw_lock+0x36/0x178 [mlx5_core]
+ [<000003ff80098824>] mlx5_crdump_collect+0x34/0x1c8 [mlx5_core]
+ [<000003ff80074b62>] mlx5_fw_fatal_reporter_dump+0x6a/0xe8 [mlx5_core]
+ [<0000000652512242>] devlink_health_do_dump.part.0+0x82/0x168
+ [<0000000652513212>] devlink_health_report+0x19a/0x230
+ [<000003ff80075a12>] mlx5_fw_fatal_reporter_err_work+0xba/0x1b0 [mlx5_core]
+
+No kernel log of the exact same error with an upstream kernel is
+available - but the very same deadlock situation can be constructed there,
+too:
+
+- task: kmcheck
+  mlx5_unload_one() tries to acquire devlink lock while the PCI error
+  recovery code has set pdev->block_cfg_access by way of
+  pci_cfg_access_lock()
+- task: kworker
+  mlx5_crdump_collect() tries to set block_cfg_access through
+  pci_cfg_access_lock() while devlink_health_report() had acquired
+  the devlink lock.
+
+A similar deadlock situation can be reproduced by requesting a
+crdump with
+  > devlink health dump show pci/<BDF> reporter fw_fatal
+
+while PCI error recovery is executed on the same <BDF> physical function
+by mlx5_core's pci_error_handlers. On s390 this can be injected with
+  > zpcictl --reset-fw <BDF>
+
+Tests with this patch failed to reproduce that second deadlock situation,
+the devlink command is rejected with "kernel answers: Permission denied" -
+and we get a kernel log message of:
+
+mlx5_core 1ed0:00:00.1: mlx5_crdump_collect:50:(pid 254382): crdump: failed to lock vsc gw err -5
+
+because the config read of VSC_SEMAPHORE is rejected by the underlying
+hardware.
+
+Two prior attempts to address this issue have been discussed and
+ultimately rejected [see link], with the primary argument that s390's
+implementation of PCI error recovery is imposing restrictions that
+neither powerpc's EEH nor PCI AER handling need. Tests show that PCI
+error recovery on s390 is running to completion even without blocking
+access to PCI config space.
+
+Link: https://lore.kernel.org/all/20251007144826.2825134-1-gbayer@linux.ibm.com/
+Cc: stable@vger.kernel.org
+Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
+Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/pci/pci_event.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/pci/pci_event.c
++++ b/arch/s390/pci/pci_event.c
+@@ -180,7 +180,7 @@ static pci_ers_result_t zpci_event_attem
+ 	 * is unbound or probed and that userspace can't access its
+ 	 * configuration space while we perform recovery.
+ 	 */
+-	pci_dev_lock(pdev);
++	device_lock(&pdev->dev);
+ 	if (pdev->error_state == pci_channel_io_perm_failure) {
+ 		ers_res = PCI_ERS_RESULT_DISCONNECT;
+ 		goto out_unlock;
+@@ -228,7 +228,7 @@ static pci_ers_result_t zpci_event_attem
+ 	if (driver->err_handler->resume)
+ 		driver->err_handler->resume(pdev);
+ out_unlock:
+-	pci_dev_unlock(pdev);
++	device_unlock(&pdev->dev);
+ 
+ 	return ers_res;
+ }
diff --git a/queue-6.12/series b/queue-6.12/series
index 5d8c9f8a31..d4200bcc83 100644
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -78,3 +78,10 @@ sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch
 cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch
 cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch
 mfd-kempld-switch-back-to-earlier-init-behavior.patch
+x86-cpu-amd-add-rdseed-fix-for-zen5.patch
+usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
+drm-sched-optimise-drm_sched_entity_push_job.patch
+drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
+drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
+s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
+s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
diff --git a/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch b/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
new file mode 100644
index 0000000000..fdd807226b
--- /dev/null
+++ b/queue-6.12/usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
@@ -0,0 +1,54 @@
+From cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 Mon Sep 17 00:00:00 2001
+From: Owen Gu <guhuinan@xiaomi.com>
+Date: Mon, 15 Sep 2025 17:29:07 +0800
+Subject: usb: gadget: f_fs: Fix epfile null pointer access after ep enable.
+
+From: Owen Gu <guhuinan@xiaomi.com>
+
+commit cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 upstream.
+
+A race condition occurs when ffs_func_eps_enable() runs concurrently
+with ffs_data_reset(). The ffs_data_clear() called in ffs_data_reset()
+sets ffs->epfiles to NULL before resetting ffs->eps_count to 0, leading
+to a NULL pointer dereference when accessing epfile->ep in
+ffs_func_eps_enable() after successful usb_ep_enable().
+
+The ffs->epfiles pointer is set to NULL in both ffs_data_clear() and
+ffs_data_close() functions, and its modification is protected by the
+spinlock ffs->eps_lock. And the whole ffs_func_eps_enable() function
+is also protected by ffs->eps_lock.
+
+Thus, add NULL pointer handling for ffs->epfiles in the
+ffs_func_eps_enable() function to fix issues
+
+Signed-off-by: Owen Gu <guhuinan@xiaomi.com>
+Link: https://lore.kernel.org/r/20250915092907.17802-1-guhuinan@xiaomi.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/gadget/function/f_fs.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -2418,7 +2418,12 @@ static int ffs_func_eps_enable(struct ff
+ 	ep = func->eps;
+ 	epfile = ffs->epfiles;
+ 	count = ffs->eps_count;
+-	while(count--) {
++	if (!epfile) {
++		ret = -ENOMEM;
++		goto done;
++	}
++
++	while (count--) {
+ 		ep->ep->driver_data = ep;
+ 
+ 		ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+@@ -2442,6 +2447,7 @@ static int ffs_func_eps_enable(struct ff
+ 	}
+ 
+ 	wake_up_interruptible(&ffs->wait);
++done:
+ 	spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+ 
+ 	return ret;
diff --git a/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch b/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch
new file mode 100644
index 0000000000..84bbf1ff9e
--- /dev/null
+++ b/queue-6.12/x86-cpu-amd-add-rdseed-fix-for-zen5.patch
@@ -0,0 +1,75 @@
+From 607b9fb2ce248cc5b633c5949e0153838992c152 Mon Sep 17 00:00:00 2001
+From: Gregory Price <gourry@gourry.net>
+Date: Mon, 20 Oct 2025 11:13:55 +0200
+Subject: x86/CPU/AMD: Add RDSEED fix for Zen5
+
+From: Gregory Price <gourry@gourry.net>
+
+commit 607b9fb2ce248cc5b633c5949e0153838992c152 upstream.
+
+There's an issue with RDSEED's 16-bit and 32-bit register output
+variants on Zen5 which return a random value of 0 "at a rate inconsistent
+with randomness while incorrectly signaling success (CF=1)". Search the
+web for AMD-SB-7055 for more detail.
+
+Add a fix glue which checks microcode revisions.
+
+  [ bp: Add microcode revisions checking, rewrite. ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20251018024010.4112396-1-gourry@gourry.net
+[ bp: 6.12 backport: use the alternative microcode version checking. ]
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/amd.c |   35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1018,8 +1018,43 @@ static void init_amd_zen4(struct cpuinfo
+ 	}
+ }
+ 
++static bool check_rdseed_microcode(void)
++{
++	struct cpuinfo_x86 *c = &boot_cpu_data;
++	union zen_patch_rev p;
++	u32 min_rev = 0;
++
++	p.ext_fam	= c->x86 - 0xf;
++	p.model		= c->x86_model;
++	p.ext_model	= c->x86_model >> 4;
++	p.stepping	= c->x86_stepping;
++	/* reserved bits are expected to be 0 in test below */
++	p.__reserved	= 0;
++
++	if (cpu_has(c, X86_FEATURE_ZEN5)) {
++		switch (p.ucode_rev >> 8) {
++		case 0xb0021:	min_rev = 0xb00215a; break;
++		case 0xb1010:	min_rev = 0xb101054; break;
++		default:
++			pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n",
++				 __func__, p.ucode_rev, c->microcode);
++			return false;
++		}
++	}
++
++	if (!min_rev)
++		return false;
++
++	return c->microcode >= min_rev;
++}
++
+ static void init_amd_zen5(struct cpuinfo_x86 *c)
+ {
++	if (!check_rdseed_microcode()) {
++		clear_cpu_cap(c, X86_FEATURE_RDSEED);
++		msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
++		pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n");
++	}
+ }
+ 
+ static void init_amd(struct cpuinfo_x86 *c)