--- /dev/null
+From stable+bounces-192197-greg=kroah.com@vger.kernel.org Mon Nov 3 21:47:10 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Nov 2025 07:44:50 -0500
+Subject: drm/sched: Fix race in drm_sched_entity_select_rq()
+To: stable@vger.kernel.org
+Cc: Philipp Stanner <phasta@kernel.org>, Tvrtko Ursulin <tvrtko.ursulin@igalia.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-3-sashal@kernel.org>
+
+From: Philipp Stanner <phasta@kernel.org>
+
+[ Upstream commit d25e3a610bae03bffc5c14b5d944a5d0cd844678 ]
+
+In a past bug fix it was forgotten that entity access must be protected
+by the entity lock. That's a data race and potentially UB.
+
+Move the spin_unlock() to the appropriate position.
+
+Cc: stable@vger.kernel.org # v5.13+
+Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3")
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Signed-off-by: Philipp Stanner <phasta@kernel.org>
+Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -558,10 +558,11 @@ void drm_sched_entity_select_rq(struct d
+ drm_sched_rq_remove_entity(entity->rq, entity);
+ entity->rq = rq;
+ }
+- spin_unlock(&entity->lock);
+
+ if (entity->num_sched_list == 1)
+ entity->sched_list = NULL;
++
++ spin_unlock(&entity->lock);
+ }
+
+ /**
--- /dev/null
+From stable+bounces-192195-greg=kroah.com@vger.kernel.org Mon Nov 3 21:51:05 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Nov 2025 07:44:48 -0500
+Subject: drm/sched: Optimise drm_sched_entity_push_job
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-1-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit d42a254633c773921884a19e8a1a0f53a31150c3 ]
+
+In FIFO mode (which is the default), both drm_sched_entity_push_job() and
+drm_sched_rq_update_fifo(), where the latter calls the former, are
+currently taking and releasing the same entity->rq_lock.
+
+We can avoid that design inelegance, and also have a miniscule
+efficiency improvement on the submit from idle path, by introducing a new
+drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to
+its callers.
+
+v2:
+ * Remove drm_sched_rq_update_fifo() altogether. (Christian)
+
+v3:
+ * Improved commit message. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 13 +++++++++----
+ drivers/gpu/drm/scheduler/sched_main.c | 6 +++---
+ include/drm/gpu_scheduler.h | 2 +-
+ 3 files changed, 13 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -506,8 +506,12 @@ struct drm_sched_job *drm_sched_entity_p
+ struct drm_sched_job *next;
+
+ next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+- if (next)
+- drm_sched_rq_update_fifo(entity, next->submit_ts);
++ if (next) {
++ spin_lock(&entity->rq_lock);
++ drm_sched_rq_update_fifo_locked(entity,
++ next->submit_ts);
++ spin_unlock(&entity->rq_lock);
++ }
+ }
+
+ /* Jobs and entities might have different lifecycles. Since we're
+@@ -607,10 +611,11 @@ void drm_sched_entity_push_job(struct dr
+ sched = rq->sched;
+
+ drm_sched_rq_add_entity(rq, entity);
+- spin_unlock(&entity->rq_lock);
+
+ if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+- drm_sched_rq_update_fifo(entity, submit_ts);
++ drm_sched_rq_update_fifo_locked(entity, submit_ts);
++
++ spin_unlock(&entity->rq_lock);
+
+ drm_sched_wakeup(sched);
+ }
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -169,14 +169,15 @@ static inline void drm_sched_rq_remove_f
+ }
+ }
+
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts)
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
+ {
+ /*
+ * Both locks need to be grabbed, one to protect from entity->rq change
+ * for entity from within concurrent drm_sched_entity_select_rq and the
+ * other to update the rb tree structure.
+ */
+- spin_lock(&entity->rq_lock);
++ lockdep_assert_held(&entity->rq_lock);
++
+ spin_lock(&entity->rq->lock);
+
+ drm_sched_rq_remove_fifo_locked(entity);
+@@ -187,7 +188,6 @@ void drm_sched_rq_update_fifo(struct drm
+ drm_sched_entity_compare_before);
+
+ spin_unlock(&entity->rq->lock);
+- spin_unlock(&entity->rq_lock);
+ }
+
+ /**
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_
+ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
+ struct drm_sched_entity *entity);
+
+-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts);
++void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
+
+ int drm_sched_entity_init(struct drm_sched_entity *entity,
+ enum drm_sched_priority priority,
--- /dev/null
+From stable+bounces-192196-greg=kroah.com@vger.kernel.org Mon Nov 3 21:47:09 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Nov 2025 07:44:49 -0500
+Subject: drm/sched: Re-group and rename the entity run-queue lock
+To: stable@vger.kernel.org
+Cc: "Tvrtko Ursulin" <tvrtko.ursulin@igalia.com>, "Christian König" <christian.koenig@amd.com>, "Alex Deucher" <alexander.deucher@amd.com>, "Luben Tuikov" <ltuikov89@gmail.com>, "Matthew Brost" <matthew.brost@intel.com>, "Philipp Stanner" <pstanner@redhat.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20251103124450.4002293-2-sashal@kernel.org>
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit f93126f5d55920d1447ef00a3fbe6706f40f53de ]
+
+When writing to a drm_sched_entity's run-queue, writers are protected
+through the lock drm_sched_entity.rq_lock. This naming, however,
+frequently collides with the separate internal lock of struct
+drm_sched_rq, resulting in uses like this:
+
+ spin_lock(&entity->rq_lock);
+ spin_lock(&entity->rq->lock);
+
+Rename drm_sched_entity.rq_lock to improve readability. While at it,
+re-order that struct's members to make it more obvious what the lock
+protects.
+
+v2:
+ * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Suggested-by: Christian König <christian.koenig@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Philipp Stanner <pstanner@redhat.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+[pstanner: Fix typo in docstring]
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin@igalia.com
+Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++++--------------
+ drivers/gpu/drm/scheduler/sched_main.c | 2 +-
+ include/drm/gpu_scheduler.h | 21 +++++++++++----------
+ 3 files changed, 26 insertions(+), 25 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -106,7 +106,7 @@ int drm_sched_entity_init(struct drm_sch
+ /* We start in an idle state. */
+ complete_all(&entity->entity_idle);
+
+- spin_lock_init(&entity->rq_lock);
++ spin_lock_init(&entity->lock);
+ spsc_queue_init(&entity->job_queue);
+
+ atomic_set(&entity->fence_seq, 0);
+@@ -134,10 +134,10 @@ void drm_sched_entity_modify_sched(struc
+ {
+ WARN_ON(!num_sched_list || !sched_list);
+
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ entity->sched_list = sched_list;
+ entity->num_sched_list = num_sched_list;
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_modify_sched);
+
+@@ -246,10 +246,10 @@ static void drm_sched_entity_kill(struct
+ if (!entity->rq)
+ return;
+
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ entity->stopped = true;
+ drm_sched_rq_remove_entity(entity->rq, entity);
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+
+ /* Make sure this entity is not used by the scheduler at the moment */
+ wait_for_completion(&entity->entity_idle);
+@@ -395,9 +395,9 @@ static void drm_sched_entity_wakeup(stru
+ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
+ enum drm_sched_priority priority)
+ {
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ entity->priority = priority;
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+ }
+ EXPORT_SYMBOL(drm_sched_entity_set_priority);
+
+@@ -507,10 +507,10 @@ struct drm_sched_job *drm_sched_entity_p
+
+ next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+ if (next) {
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ drm_sched_rq_update_fifo_locked(entity,
+ next->submit_ts);
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+ }
+ }
+
+@@ -551,14 +551,14 @@ void drm_sched_entity_select_rq(struct d
+ if (fence && !dma_fence_is_signaled(fence))
+ return;
+
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
+ rq = sched ? sched->sched_rq[entity->priority] : NULL;
+ if (rq != entity->rq) {
+ drm_sched_rq_remove_entity(entity->rq, entity);
+ entity->rq = rq;
+ }
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+
+ if (entity->num_sched_list == 1)
+ entity->sched_list = NULL;
+@@ -599,9 +599,9 @@ void drm_sched_entity_push_job(struct dr
+ struct drm_sched_rq *rq;
+
+ /* Add the entity to the run queue */
+- spin_lock(&entity->rq_lock);
++ spin_lock(&entity->lock);
+ if (entity->stopped) {
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+
+ DRM_ERROR("Trying to push to a killed entity\n");
+ return;
+@@ -615,7 +615,7 @@ void drm_sched_entity_push_job(struct dr
+ if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+ drm_sched_rq_update_fifo_locked(entity, submit_ts);
+
+- spin_unlock(&entity->rq_lock);
++ spin_unlock(&entity->lock);
+
+ drm_sched_wakeup(sched);
+ }
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(str
+ * for entity from within concurrent drm_sched_entity_select_rq and the
+ * other to update the rb tree structure.
+ */
+- lockdep_assert_held(&entity->rq_lock);
++ lockdep_assert_held(&entity->lock);
+
+ spin_lock(&entity->rq->lock);
+
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -97,13 +97,21 @@ struct drm_sched_entity {
+ struct list_head list;
+
+ /**
++ * @lock:
++ *
++ * Lock protecting the run-queue (@rq) to which this entity belongs,
++ * @priority and the list of schedulers (@sched_list, @num_sched_list).
++ */
++ spinlock_t lock;
++
++ /**
+ * @rq:
+ *
+ * Runqueue on which this entity is currently scheduled.
+ *
+ * FIXME: Locking is very unclear for this. Writers are protected by
+- * @rq_lock, but readers are generally lockless and seem to just race
+- * with not even a READ_ONCE.
++ * @lock, but readers are generally lockless and seem to just race with
++ * not even a READ_ONCE.
+ */
+ struct drm_sched_rq *rq;
+
+@@ -136,18 +144,11 @@ struct drm_sched_entity {
+ * @priority:
+ *
+ * Priority of the entity. This can be modified by calling
+- * drm_sched_entity_set_priority(). Protected by &rq_lock.
++ * drm_sched_entity_set_priority(). Protected by @lock.
+ */
+ enum drm_sched_priority priority;
+
+ /**
+- * @rq_lock:
+- *
+- * Lock to modify the runqueue to which this entity belongs.
+- */
+- spinlock_t rq_lock;
+-
+- /**
+ * @job_queue: the list of jobs of this entity.
+ */
+ struct spsc_queue job_queue;
--- /dev/null
+From stable+bounces-192140-greg=kroah.com@vger.kernel.org Mon Nov 3 11:24:25 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 2 Nov 2025 21:24:16 -0500
+Subject: s390: Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+To: stable@vger.kernel.org
+Cc: Heiko Carstens <hca@linux.ibm.com>, Luiz Capitulino <luizcap@redhat.com>, Gerald Schaefer <gerald.schaefer@linux.ibm.com>, David Hildenbrand <david@redhat.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022416.3808769-1-sashal@kernel.org>
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 64e2f60f355e556337fcffe80b9bcff1b22c9c42 ]
+
+As reported by Luiz Capitulino enabling HVO on s390 leads to reproducible
+crashes. The problem is that kernel page tables are modified without
+flushing corresponding TLB entries.
+
+Even if it looks like the empty flush_tlb_all() implementation on s390 is
+the problem, it is actually a different problem: on s390 it is not allowed
+to replace an active/valid page table entry with another valid page table
+entry without the detour over an invalid entry. A direct replacement may
+lead to random crashes and/or data corruption.
+
+In order to invalidate an entry special instructions have to be used
+(e.g. ipte or idte). Alternatively there are also special instructions
+available which allow to replace a valid entry with a different valid
+entry (e.g. crdte or cspg).
+
+Given that the HVO code currently does not provide the hooks to allow for
+an implementation which is compliant with the s390 architecture
+requirements, disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP again, which is
+basically a revert of the original patch which enabled it.
+
+Reported-by: Luiz Capitulino <luizcap@redhat.com>
+Closes: https://lore.kernel.org/all/20251028153930.37107-1-luizcap@redhat.com/
+Fixes: 00a34d5a99c0 ("s390: select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP")
+Cc: stable@vger.kernel.org
+Tested-by: Luiz Capitulino <luizcap@redhat.com>
+Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/Kconfig | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -135,7 +135,6 @@ config S390
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_KERNEL_PMD_MKWRITE
+ select ARCH_WANT_LD_ORPHAN_WARN
+- select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+ select BUILDTIME_TABLE_SORT
+ select CLONE_BACKWARDS2
+ select DCACHE_WORD_ACCESS if !KMSAN
--- /dev/null
+From stable+bounces-192141-greg=kroah.com@vger.kernel.org Mon Nov 3 11:24:26 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 2 Nov 2025 21:24:19 -0500
+Subject: s390/pci: Avoid deadlock between PCI error recovery and mlx5 crdump
+To: stable@vger.kernel.org
+Cc: Gerd Bayer <gbayer@linux.ibm.com>, Niklas Schnelle <schnelle@linux.ibm.com>, Heiko Carstens <hca@linux.ibm.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20251103022419.3808824-1-sashal@kernel.org>
+
+From: Gerd Bayer <gbayer@linux.ibm.com>
+
+[ Upstream commit 0fd20f65df6aa430454a0deed8f43efa91c54835 ]
+
+Do not block PCI config accesses through pci_cfg_access_lock() when
+executing the s390 variant of PCI error recovery: Acquire just
+device_lock() instead of pci_dev_lock() as powerpc's EEH and
+generig PCI AER processing do.
+
+During error recovery testing a pair of tasks was reported to be hung:
+
+mlx5_core 0000:00:00.1: mlx5_health_try_recover:338:(pid 5553): health recovery flow aborted, PCI reads still not working
+INFO: task kmcheck:72 blocked for more than 122 seconds.
+ Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kmcheck state:D stack:0 pid:72 tgid:72 ppid:2 flags:0x00000000
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<000000065256f572>] schedule_preempt_disabled+0x22/0x30
+ [<0000000652570a94>] __mutex_lock.constprop.0+0x484/0x8a8
+ [<000003ff800673a4>] mlx5_unload_one+0x34/0x58 [mlx5_core]
+ [<000003ff8006745c>] mlx5_pci_err_detected+0x94/0x140 [mlx5_core]
+ [<0000000652556c5a>] zpci_event_attempt_error_recovery+0xf2/0x398
+ [<0000000651b9184a>] __zpci_event_error+0x23a/0x2c0
+INFO: task kworker/u1664:6:1514 blocked for more than 122 seconds.
+ Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+task:kworker/u1664:6 state:D stack:0 pid:1514 tgid:1514 ppid:2 flags:0x00000000
+Workqueue: mlx5_health0000:00:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core]
+Call Trace:
+ [<000000065256f030>] __schedule+0x2a0/0x590
+ [<000000065256f356>] schedule+0x36/0xe0
+ [<0000000652172e28>] pci_wait_cfg+0x80/0xe8
+ [<0000000652172f94>] pci_cfg_access_lock+0x74/0x88
+ [<000003ff800916b6>] mlx5_vsc_gw_lock+0x36/0x178 [mlx5_core]
+ [<000003ff80098824>] mlx5_crdump_collect+0x34/0x1c8 [mlx5_core]
+ [<000003ff80074b62>] mlx5_fw_fatal_reporter_dump+0x6a/0xe8 [mlx5_core]
+ [<0000000652512242>] devlink_health_do_dump.part.0+0x82/0x168
+ [<0000000652513212>] devlink_health_report+0x19a/0x230
+ [<000003ff80075a12>] mlx5_fw_fatal_reporter_err_work+0xba/0x1b0 [mlx5_core]
+
+No kernel log of the exact same error with an upstream kernel is
+available - but the very same deadlock situation can be constructed there,
+too:
+
+- task: kmcheck
+ mlx5_unload_one() tries to acquire devlink lock while the PCI error
+ recovery code has set pdev->block_cfg_access by way of
+ pci_cfg_access_lock()
+- task: kworker
+ mlx5_crdump_collect() tries to set block_cfg_access through
+ pci_cfg_access_lock() while devlink_health_report() had acquired
+ the devlink lock.
+
+A similar deadlock situation can be reproduced by requesting a
+crdump with
+ > devlink health dump show pci/<BDF> reporter fw_fatal
+
+while PCI error recovery is executed on the same <BDF> physical function
+by mlx5_core's pci_error_handlers. On s390 this can be injected with
+ > zpcictl --reset-fw <BDF>
+
+Tests with this patch failed to reproduce that second deadlock situation,
+the devlink command is rejected with "kernel answers: Permission denied" -
+and we get a kernel log message of:
+
+mlx5_core 1ed0:00:00.1: mlx5_crdump_collect:50:(pid 254382): crdump: failed to lock vsc gw err -5
+
+because the config read of VSC_SEMAPHORE is rejected by the underlying
+hardware.
+
+Two prior attempts to address this issue have been discussed and
+ultimately rejected [see link], with the primary argument that s390's
+implementation of PCI error recovery is imposing restrictions that
+neither powerpc's EEH nor PCI AER handling need. Tests show that PCI
+error recovery on s390 is running to completion even without blocking
+access to PCI config space.
+
+Link: https://lore.kernel.org/all/20251007144826.2825134-1-gbayer@linux.ibm.com/
+Cc: stable@vger.kernel.org
+Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery")
+Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/pci/pci_event.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/pci/pci_event.c
++++ b/arch/s390/pci/pci_event.c
+@@ -180,7 +180,7 @@ static pci_ers_result_t zpci_event_attem
+ * is unbound or probed and that userspace can't access its
+ * configuration space while we perform recovery.
+ */
+- pci_dev_lock(pdev);
++ device_lock(&pdev->dev);
+ if (pdev->error_state == pci_channel_io_perm_failure) {
+ ers_res = PCI_ERS_RESULT_DISCONNECT;
+ goto out_unlock;
+@@ -228,7 +228,7 @@ static pci_ers_result_t zpci_event_attem
+ if (driver->err_handler->resume)
+ driver->err_handler->resume(pdev);
+ out_unlock:
+- pci_dev_unlock(pdev);
++ device_unlock(&pdev->dev);
+
+ return ers_res;
+ }
cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch
cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch
mfd-kempld-switch-back-to-earlier-init-behavior.patch
+x86-cpu-amd-add-rdseed-fix-for-zen5.patch
+usb-gadget-f_fs-fix-epfile-null-pointer-access-after-ep-enable.patch
+drm-sched-optimise-drm_sched_entity_push_job.patch
+drm-sched-re-group-and-rename-the-entity-run-queue-lock.patch
+drm-sched-fix-race-in-drm_sched_entity_select_rq.patch
+s390-pci-avoid-deadlock-between-pci-error-recovery-and-mlx5-crdump.patch
+s390-disable-arch_want_optimize_hugetlb_vmemmap.patch
--- /dev/null
+From cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 Mon Sep 17 00:00:00 2001
+From: Owen Gu <guhuinan@xiaomi.com>
+Date: Mon, 15 Sep 2025 17:29:07 +0800
+Subject: usb: gadget: f_fs: Fix epfile null pointer access after ep enable.
+
+From: Owen Gu <guhuinan@xiaomi.com>
+
+commit cfd6f1a7b42f62523c96d9703ef32b0dbc495ba4 upstream.
+
+A race condition occurs when ffs_func_eps_enable() runs concurrently
+with ffs_data_reset(). The ffs_data_clear() called in ffs_data_reset()
+sets ffs->epfiles to NULL before resetting ffs->eps_count to 0, leading
+to a NULL pointer dereference when accessing epfile->ep in
+ffs_func_eps_enable() after successful usb_ep_enable().
+
+The ffs->epfiles pointer is set to NULL in both ffs_data_clear() and
+ffs_data_close() functions, and its modification is protected by the
+spinlock ffs->eps_lock. And the whole ffs_func_eps_enable() function
+is also protected by ffs->eps_lock.
+
+Thus, add NULL pointer handling for ffs->epfiles in the
+ffs_func_eps_enable() function to fix issues
+
+Signed-off-by: Owen Gu <guhuinan@xiaomi.com>
+Link: https://lore.kernel.org/r/20250915092907.17802-1-guhuinan@xiaomi.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/gadget/function/f_fs.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/gadget/function/f_fs.c
++++ b/drivers/usb/gadget/function/f_fs.c
+@@ -2418,7 +2418,12 @@ static int ffs_func_eps_enable(struct ff
+ ep = func->eps;
+ epfile = ffs->epfiles;
+ count = ffs->eps_count;
+- while(count--) {
++ if (!epfile) {
++ ret = -ENOMEM;
++ goto done;
++ }
++
++ while (count--) {
+ ep->ep->driver_data = ep;
+
+ ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
+@@ -2442,6 +2447,7 @@ static int ffs_func_eps_enable(struct ff
+ }
+
+ wake_up_interruptible(&ffs->wait);
++done:
+ spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+
+ return ret;
--- /dev/null
+From 607b9fb2ce248cc5b633c5949e0153838992c152 Mon Sep 17 00:00:00 2001
+From: Gregory Price <gourry@gourry.net>
+Date: Mon, 20 Oct 2025 11:13:55 +0200
+Subject: x86/CPU/AMD: Add RDSEED fix for Zen5
+
+From: Gregory Price <gourry@gourry.net>
+
+commit 607b9fb2ce248cc5b633c5949e0153838992c152 upstream.
+
+There's an issue with RDSEED's 16-bit and 32-bit register output
+variants on Zen5 which return a random value of 0 "at a rate inconsistent
+with randomness while incorrectly signaling success (CF=1)". Search the
+web for AMD-SB-7055 for more detail.
+
+Add a fix glue which checks microcode revisions.
+
+ [ bp: Add microcode revisions checking, rewrite. ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20251018024010.4112396-1-gourry@gourry.net
+[ bp: 6.12 backport: use the alternative microcode version checking. ]
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/amd.c | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1018,8 +1018,43 @@ static void init_amd_zen4(struct cpuinfo
+ }
+ }
+
++static bool check_rdseed_microcode(void)
++{
++ struct cpuinfo_x86 *c = &boot_cpu_data;
++ union zen_patch_rev p;
++ u32 min_rev = 0;
++
++ p.ext_fam = c->x86 - 0xf;
++ p.model = c->x86_model;
++ p.ext_model = c->x86_model >> 4;
++ p.stepping = c->x86_stepping;
++ /* reserved bits are expected to be 0 in test below */
++ p.__reserved = 0;
++
++ if (cpu_has(c, X86_FEATURE_ZEN5)) {
++ switch (p.ucode_rev >> 8) {
++ case 0xb0021: min_rev = 0xb00215a; break;
++ case 0xb1010: min_rev = 0xb101054; break;
++ default:
++ pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n",
++ __func__, p.ucode_rev, c->microcode);
++ return false;
++ }
++ }
++
++ if (!min_rev)
++ return false;
++
++ return c->microcode >= min_rev;
++}
++
+ static void init_amd_zen5(struct cpuinfo_x86 *c)
+ {
++ if (!check_rdseed_microcode()) {
++ clear_cpu_cap(c, X86_FEATURE_RDSEED);
++ msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
++ pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n");
++ }
+ }
+
+ static void init_amd(struct cpuinfo_x86 *c)