--- /dev/null
+From c9fdb67b2301d0ea95df7310796670a59113bad5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:32 +0200
+Subject: ARM: allow __do_kernel_fault() to report execution of memory faults
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit 40b466db1dffb41f0529035c59c5739636d0e5b8 upstream
+
+Allow __do_kernel_fault() to detect the execution of memory, so we can
+provide the same fault message as do_page_fault() would do. This is
+required when we split the kernel address fault handling from the
+main do_page_fault() code path.
+
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 2315d40760a787..c94633eb64a1bb 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -172,6 +172,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+ */
+ if (addr < PAGE_SIZE) {
+ msg = "NULL pointer dereference";
++ } else if (is_permission_fault(fsr) && fsr & FSR_LNX_PF) {
++ msg = "execution of memory";
+ } else {
+ if (is_translation_fault(fsr) &&
+ kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
+--
+2.53.0
+
--- /dev/null
+From 1bbb126846de94b95bc4cb739ceb067c945455b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:34 +0200
+Subject: ARM: fix branch predictor hardening
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit fd2dee1c6e2256f726ba33fd3083a7be0efc80d3 upstream.
+
+__do_user_fault() may be called with indeterminent interrupt enable
+state, which means we may be preemptive at this point. This causes
+problems when calling harden_branch_predictor(). For example, when
+called from a data abort, do_alignment_fault()->do_bad_area().
+
+Move harden_branch_predictor() out of __do_user_fault() and into the
+calling contexts.
+
+Moving it into do_kernel_address_page_fault(), we can be sure that
+interrupts will be disabled here.
+
+Converting do_translation_fault() to use do_kernel_address_page_fault()
+rather than do_bad_area() means that we keep branch predictor handling
+for translation faults. Interrupts will also be disabled at this call
+site.
+
+do_sect_fault() needs special handling, so detect user mode accesses
+to kernel-addresses, and add an explicit call to branch predictor
+hardening.
+
+Finally, add branch predictor hardening to do_alignment() for the
+faulting case (user mode accessing kernel addresses) before interrupts
+are enabled.
+
+This should cover all cases where harden_branch_predictor() is called,
+ensuring that it is always has interrupts disabled, also ensuring that
+it is called early in each call path.
+
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/alignment.c | 4 ++++
+ arch/arm/mm/fault.c | 39 ++++++++++++++++++++++++++-------------
+ 2 files changed, 30 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
+index f8dd0b3cc8e040..ee264737be6d26 100644
+--- a/arch/arm/mm/alignment.c
++++ b/arch/arm/mm/alignment.c
+@@ -22,6 +22,7 @@
+
+ #include <asm/cp15.h>
+ #include <asm/system_info.h>
++#include <asm/system_misc.h>
+ #include <asm/unaligned.h>
+ #include <asm/opcodes.h>
+
+@@ -809,6 +810,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ int thumb2_32b = 0;
+ int fault;
+
++ if (addr >= TASK_SIZE && user_mode(regs))
++ harden_branch_predictor();
++
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 907705992ab65f..d0681285dbda3e 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -195,9 +195,6 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
+ {
+ struct task_struct *tsk = current;
+
+- if (addr > TASK_SIZE)
+- harden_branch_predictor();
+-
+ #ifdef CONFIG_DEBUG_USER
+ if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
+ ((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
+@@ -248,8 +245,10 @@ do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr,
+ /*
+ * Fault from user mode for a kernel space address. User mode
+ * should not be faulting in kernel space, which includes the
+- * vector/khelper page. Send a SIGSEGV.
++ * vector/khelper page. Handle the branch predictor hardening
++ * while interrupts are still disabled, then send a SIGSEGV.
+ */
++ harden_branch_predictor();
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ } else {
+ /*
+@@ -419,16 +418,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ * We enter here because the first level page table doesn't contain
+ * a valid entry for the address.
+ *
+- * If the address is in kernel space (>= TASK_SIZE), then we are
+- * probably faulting in the vmalloc() area.
++ * If this is a user address (addr < TASK_SIZE), we handle this as a
++ * normal page fault. This leaves the remainder of the function to handle
++ * kernel address translation faults.
+ *
+- * If the init_task's first level page tables contains the relevant
+- * entry, we copy the it to this task. If not, we send the process
+- * a signal, fixup the exception, or oops the kernel.
++ * Since user mode is not permitted to access kernel addresses, pass these
++ * directly to do_kernel_address_page_fault() to handle.
+ *
+- * NOTE! We MUST NOT take any locks for this case. We may be in an
+- * interrupt or a critical region, and should only copy the information
+- * from the master page table, nothing more.
++ * Otherwise, we're probably faulting in the vmalloc() area, so try to fix
++ * that up. Note that we must not take any locks or enable interrupts in
++ * this case.
++ *
++ * If vmalloc() fixup fails, that means the non-leaf page tables did not
++ * contain an entry for this address, so handle this via
++ * do_kernel_address_page_fault().
+ */
+ #ifdef CONFIG_MMU
+ static int __kprobes
+@@ -494,7 +497,8 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ return 0;
+
+ bad_area:
+- do_bad_area(addr, fsr, regs);
++ do_kernel_address_page_fault(current->mm, addr, fsr, regs);
++
+ return 0;
+ }
+ #else /* CONFIG_MMU */
+@@ -514,7 +518,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ static int
+ do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
++ /*
++ * If this is a kernel address, but from user mode, then userspace
++ * is trying bad stuff. Invoke the branch predictor handling.
++ * Interrupts are disabled here.
++ */
++ if (addr >= TASK_SIZE && user_mode(regs))
++ harden_branch_predictor();
++
+ do_bad_area(addr, fsr, regs);
++
+ return 0;
+ }
+ #endif /* CONFIG_ARM_LPAE */
+--
+2.53.0
+
--- /dev/null
+From 3f5062f86cdd26f1ed4d1aed7a99c629bd5ea80c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:33 +0200
+Subject: ARM: fix hash_name() fault
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit 7733bc7d299d682f2723dc38fc7f370b9bf973e9 upstream.
+
+Zizhi Wo reports:
+
+"During the execution of hash_name()->load_unaligned_zeropad(), a
+ potential memory access beyond the PAGE boundary may occur. For
+ example, when the filename length is near the PAGE_SIZE boundary.
+ This triggers a page fault, which leads to a call to
+ do_page_fault()->mmap_read_trylock(). If we can't acquire the lock,
+ we have to fall back to the mmap_read_lock() path, which calls
+ might_sleep(). This breaks RCU semantics because path lookup occurs
+ under an RCU read-side critical section."
+
+This is seen with CONFIG_DEBUG_ATOMIC_SLEEP=y and CONFIG_KFENCE=y.
+
+Kernel addresses (with the exception of the vectors/kuser helper
+page) do not have VMAs associated with them. If the vectors/kuser
+helper page faults, then there are two possibilities:
+
+1. if the fault happened while in kernel mode, then we're basically
+ dead, because the CPU won't be able to vector through this page
+ to handle the fault.
+2. if the fault happened while in user mode, that means the page was
+ protected from user access, and we want to fault anyway.
+
+Thus, we can handle kernel addresses from any context entirely
+separately without going anywhere near the mmap lock. This gives us
+an entirely non-sleeping path for all kernel mode kernel address
+faults.
+
+As we handle the kernel address faults before interrupts are enabled,
+this change has the side effect of improving the branch predictor
+hardening, but does not completely solve the issue.
+
+Reported-by: Zizhi Wo <wozizhi@huaweicloud.com>
+Reported-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Link: https://lore.kernel.org/r/20251126090505.3057219-1-wozizhi@huaweicloud.com
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index c94633eb64a1bb..907705992ab65f 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -240,6 +240,35 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+ #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
++static int __kprobes
++do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr,
++ unsigned int fsr, struct pt_regs *regs)
++{
++ if (user_mode(regs)) {
++ /*
++ * Fault from user mode for a kernel space address. User mode
++ * should not be faulting in kernel space, which includes the
++ * vector/khelper page. Send a SIGSEGV.
++ */
++ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
++ } else {
++ /*
++ * Fault from kernel mode. Enable interrupts if they were
++ * enabled in the parent context. Section (upper page table)
++ * translation faults are handled via do_translation_fault(),
++ * so we will only get here for a non-present kernel space
++ * PTE or PTE permission fault. This may happen in exceptional
++ * circumstances and need the fixup tables to be walked.
++ */
++ if (interrupts_enabled(regs))
++ local_irq_enable();
++
++ __do_kernel_fault(mm, addr, fsr, regs);
++ }
++
++ return 0;
++}
++
+ static int __kprobes
+ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
+@@ -253,6 +282,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ if (kprobe_page_fault(regs, fsr))
+ return 0;
+
++ /*
++ * Handle kernel addresses faults separately, which avoids touching
++ * the mmap lock from contexts that are not able to sleep.
++ */
++ if (addr >= TASK_SIZE)
++ return do_kernel_address_page_fault(mm, addr, fsr, regs);
+
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (interrupts_enabled(regs))
+--
+2.53.0
+
--- /dev/null
+From b2b73c84ad000c9ccaedbbfed4048b3d415b9ea5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:31 +0200
+Subject: ARM: group is_permission_fault() with is_translation_fault()
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit dea20281ac88226615761c570c8ff7adc18e6ac2 upstream.
+
+Group is_permission_fault() with is_translation_fault(), which is
+needed to use is_permission_fault() in __do_kernel_fault(). As
+this is static inline, there is no need for this to be under
+CONFIG_MMU.
+
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 16a7765511f8e4..2315d40760a787 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -125,6 +125,19 @@ static inline bool is_translation_fault(unsigned int fsr)
+ return false;
+ }
+
++static inline bool is_permission_fault(unsigned int fsr)
++{
++ int fs = fsr_fs(fsr);
++#ifdef CONFIG_ARM_LPAE
++ if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
++ return true;
++#else
++ if (fs == FS_L1_PERM || fs == FS_L2_PERM)
++ return true;
++#endif
++ return false;
++}
++
+ static void die_kernel_fault(const char *msg, struct mm_struct *mm,
+ unsigned long addr, unsigned int fsr,
+ struct pt_regs *regs)
+@@ -225,19 +238,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+ #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
+-static inline bool is_permission_fault(unsigned int fsr)
+-{
+- int fs = fsr_fs(fsr);
+-#ifdef CONFIG_ARM_LPAE
+- if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
+- return true;
+-#else
+- if (fs == FS_L1_PERM || fs == FS_L2_PERM)
+- return true;
+-#endif
+- return false;
+-}
+-
+ static int __kprobes
+ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
+--
+2.53.0
+
--- /dev/null
+From 4c2c9ce1c285ffecb3dfdf1491191d8030426121 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:53 +0200
+Subject: debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 06e0ae988f6e3499785c407429953ade19c1096b upstream.
+
+The pool of free objects is refilled on several occasions such as object
+initialisation. On PREEMPT_RT refilling is limited to preemptible
+sections due to sleeping locks used by the memory allocator. The system
+boots with disabled interrupts so the pool can not be refilled.
+
+If too many objects are initialized and the pool gets empty then
+debugobjects disables itself.
+
+Refiling can also happen early in the boot with disabled interrupts as
+long as the scheduler is not operational. If the scheduler can not
+preempt a task then a sleeping lock can not be contended.
+
+Allow to additionally refill the pool if the scheduler is not
+operational.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-2-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 46eabbae69ccfc..bb5c909458535b 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -605,7 +605,7 @@ static void debug_objects_fill_pool(void)
+ * raw_spinlock_t are basically the same type and this lock-type
+ * inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to WAIT_SLEEP, matching
+--
+2.53.0
+
--- /dev/null
+From fc1b7b54a02c62650fce4a16e202e37742e04bb7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:56:02 +0200
+Subject: debugobjects: Do not fill_pool() if pi_blocked_on
+
+From: Helen Koike <koike@igalia.com>
+
+commit 5f41161059fd0f1bbf18c90f3180e38cc45a14eb upstream.
+
+On RT enabled kernels, fill_pool() ends up calling rtlock_lock(), which
+asserts if current::pi_blocked_on is set, because a task can obviously only
+block on one lock as otherwise the priority inheritenace chain gets
+corrupted.
+
+Prevent this by expanding the conditional to take current::pi_blocked_on
+into account.
+
+Fixes: 4bedcc28469a ("debugobjects: Make them PREEMPT_RT aware")
+Reported-by: syzbot+b8ca586b9fc235f0c0df@syzkaller.appspotmail.com
+Signed-off-by: Helen Koike <koike@igalia.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Link: https://patch.msgid.link/20260511215359.3351259-1-koike@igalia.com
+Closes: https://syzkaller.appspot.com/bug?extid=b8ca586b9fc235f0c0df
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index a7f3c6f15125a9..5e653e2ffa8dac 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -597,15 +597,25 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
+ return NULL;
+ }
+
++static inline bool debug_objects_is_pi_blocked_on(void)
++{
++#ifdef CONFIG_RT_MUTEXES
++ return current->pi_blocked_on != NULL;
++#else
++ return false;
++#endif
++}
++
+ static void debug_objects_fill_pool(void)
+ {
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+- * context -- for !RT kernels we rely on the fact that spinlock_t and
+- * raw_spinlock_t are basically the same type and this lock-type
+- * inversion works just fine.
++ * context and not enqueued on an rt_mutex -- for !RT kernels we rely
++ * on the fact that spinlock_t and raw_spinlock_t are basically the
++ * same type and this lock-type inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
++ (preemptible() && !debug_objects_is_pi_blocked_on())) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+--
+2.53.0
+
--- /dev/null
+From 3eb961e2cde12b5c17ca9cc4d08ca32b7d04f19d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:56:07 +0200
+Subject: debugobjects: Dont call fill_pool() in early boot hardirq context
+
+From: Waiman Long <longman@redhat.com>
+
+commit 0d046ae106255cba5eb83b23f78ee93f3620247d upstream.
+
+When booting a debug PREEMPT_RT kernel on an ARM64 system, a "inconsistent
+{HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage" lockdep warning message was
+reported to the console.
+
+During early boot, interrupts are enabled before the scheduler is
+enabled. In this window (before SYSTEM_SCHEDULING is set) interrupts can
+fire and in the hard interrupt context handler attempt to fill the pool
+
+This can lead to a deadlock when the interrupt occurred when the interrupt
+hits a region which holds a lock that is required to be taken in the
+allocation path.
+
+Add a new can_fill_pool() helper and reorder the exception rule and forbid
+this scenario by excluding allocations from hard interrupt context.
+
+Fixes: 06e0ae988f6e ("debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260605173038.495075-1-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 44 ++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 36 insertions(+), 8 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 5e653e2ffa8dac..ef1fea990df70a 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -606,20 +606,48 @@ static inline bool debug_objects_is_pi_blocked_on(void)
+ #endif
+ }
+
+-static void debug_objects_fill_pool(void)
++static inline bool can_fill_pool(void)
+ {
+ /*
+- * On RT enabled kernels the pool refill must happen in preemptible
+- * context and not enqueued on an rt_mutex -- for !RT kernels we rely
+- * on the fact that spinlock_t and raw_spinlock_t are basically the
+- * same type and this lock-type inversion works just fine.
++ * On !RT enabled kernels there are no restrictions and spinlock_t and
++ * raw_spinlock_t are the same types.
++ */
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ return true;
++
++ /*
++ * On RT enabled kernels, the task must not be blocked on a lock as
++ * that could corrupt the PI state when blocking on a lock in the
++ * allocation path.
++ */
++ if (debug_objects_is_pi_blocked_on())
++ return false;
++
++ /*
++ * On RT enabled kernels the pool refill should happen in preemptible
++ * context.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
+- (preemptible() && !debug_objects_is_pi_blocked_on())) {
++ if (preemptible())
++ return true;
++
++ /*
++ * Though during system boot before scheduling is set up, preemption is
++ * disabled and the pool can get exhausted. Before scheduling is active
++ * a task cannot be blocked on a sleeping lock, but it might hold a lock
++ * and if interrupted then hard interrupt context might run into a lock
++ * inversion. So exclude hard interrupt context from allocations before
++ * scheduling is active.
++ */
++ return system_state < SYSTEM_SCHEDULING && !in_hardirq();
++}
++
++static void debug_objects_fill_pool(void)
++{
++ if (can_fill_pool()) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+- * the preemptible() condition above.
++ * the preemptible() condition in can_fill_pool().
+ */
+ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+--
+2.53.0
+
--- /dev/null
+From aa674b172bc3fa6f2e17a2e80139e4ac8c0a76bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:49 +0200
+Subject: debugobjects,locking: Annotate debug_object_fill_pool() wait type
+ violation
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0cce06ba859a515bd06224085d3addb870608b6d upstream.
+
+There is an explicit wait-type violation in debug_object_fill_pool()
+for PREEMPT_RT=n kernels which allows them to more easily fill the
+object pool and reduce the chance of allocation failures.
+
+Lockdep's wait-type checks are designed to check the PREEMPT_RT
+locking rules even for PREEMPT_RT=n kernels and object to this, so
+create a lockdep annotation to allow this to stand.
+
+Specifically, create a 'lock' type that overrides the inner wait-type
+while it is held -- allowing one to temporarily raise it, such that
+the violation is hidden.
+
+Reported-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Link: https://lkml.kernel.org/r/20230429100614.GA1489784@hirez.programming.kicks-ass.net
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/lockdep.h | 14 ++++++++++++++
+ include/linux/lockdep_types.h | 1 +
+ kernel/locking/lockdep.c | 28 +++++++++++++++++++++-------
+ lib/debugobjects.c | 15 +++++++++++++--
+ 4 files changed, 49 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
+index 43d8734ac0eb0b..90aa802a30669c 100644
+--- a/include/linux/lockdep.h
++++ b/include/linux/lockdep.h
+@@ -339,6 +339,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
+ #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c))
+ #define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c))
+
++/*
++ * Must use lock_map_aquire_try() with override maps to avoid
++ * lockdep thinking they participate in the block chain.
++ */
++#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \
++ struct lockdep_map _name = { \
++ .name = #_name "-wait-type-override", \
++ .wait_type_inner = _wait_type, \
++ .lock_type = LD_LOCK_WAIT_OVERRIDE, }
++
+ #else /* !CONFIG_LOCKDEP */
+
+ static inline void lockdep_init_task(struct task_struct *task)
+@@ -427,6 +437,9 @@ extern int lockdep_is_held(const void *);
+ #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
+ #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
+
++#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \
++ struct lockdep_map __maybe_unused _name = {}
++
+ #endif /* !LOCKDEP */
+
+ enum xhlock_context_t {
+@@ -552,6 +565,7 @@ do { \
+ #define rwsem_release(l, i) lock_release(l, i)
+
+ #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
++#define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
+ #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
+ #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
+ #define lock_map_release(l) lock_release(l, _THIS_IP_)
+diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
+index d22430840b53f9..59f4fb1626ea60 100644
+--- a/include/linux/lockdep_types.h
++++ b/include/linux/lockdep_types.h
+@@ -33,6 +33,7 @@ enum lockdep_wait_type {
+ enum lockdep_lock_type {
+ LD_LOCK_NORMAL = 0, /* normal, catch all */
+ LD_LOCK_PERCPU, /* percpu */
++ LD_LOCK_WAIT_OVERRIDE, /* annotation */
+ LD_LOCK_MAX,
+ };
+
+diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
+index 5f8ce961cd9a3f..463834778b4b04 100644
+--- a/kernel/locking/lockdep.c
++++ b/kernel/locking/lockdep.c
+@@ -2245,6 +2245,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)
+
+ static inline bool usage_skip(struct lock_list *entry, void *mask)
+ {
++ if (entry->class->lock_type == LD_LOCK_NORMAL)
++ return false;
++
+ /*
+ * Skip local_lock() for irq inversion detection.
+ *
+@@ -2271,14 +2274,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
+ * As a result, we will skip local_lock(), when we search for irq
+ * inversion bugs.
+ */
+- if (entry->class->lock_type == LD_LOCK_PERCPU) {
+- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+- return false;
++ if (entry->class->lock_type == LD_LOCK_PERCPU &&
++ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
++ return false;
+
+- return true;
+- }
++ /*
++ * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually
++ * a lock and only used to override the wait_type.
++ */
+
+- return false;
++ return true;
+ }
+
+ /*
+@@ -4745,7 +4750,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
+
+ for (; depth < curr->lockdep_depth; depth++) {
+ struct held_lock *prev = curr->held_locks + depth;
+- u8 prev_inner = hlock_class(prev)->wait_type_inner;
++ struct lock_class *class = hlock_class(prev);
++ u8 prev_inner = class->wait_type_inner;
+
+ if (prev_inner) {
+ /*
+@@ -4755,6 +4761,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
+ * Also due to trylocks.
+ */
+ curr_inner = min(curr_inner, prev_inner);
++
++ /*
++ * Allow override for annotations -- this is typically
++ * only valid/needed for code that only exists when
++ * CONFIG_PREEMPT_RT=n.
++ */
++ if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE))
++ curr_inner = prev_inner;
+ }
+ }
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 1e193a5f6b4a72..46eabbae69ccfc 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -601,10 +601,21 @@ static void debug_objects_fill_pool(void)
+ {
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+- * context:
++ * context -- for !RT kernels we rely on the fact that spinlock_t and
++ * raw_spinlock_t are basically the same type and this lock-type
++ * inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
++ /*
++ * Annotate away the spinlock_t inside raw_spinlock_t warning
++ * by temporarily raising the wait-type to WAIT_SLEEP, matching
++ * the preemptible() condition above.
++ */
++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
++ lock_map_acquire_try(&fill_pool_map);
+ fill_pool();
++ lock_map_release(&fill_pool_map);
++ }
+ }
+
+ static void
+--
+2.53.0
+
--- /dev/null
+From 4af4bc44d340a7c542af22a2ae2c2aef2c5fa440 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:57 +0200
+Subject: debugobjects: Use LD_WAIT_CONFIG instead of LD_WAIT_SLEEP
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 37de2dbc318ee10577c1c2704de5a803e75e55a2 upstream.
+
+fill_pool_map is used to suppress nesting violations caused by acquiring
+a spinlock_t (from within the memory allocator) while holding a
+raw_spinlock_t. The used annotation is wrong.
+
+LD_WAIT_SLEEP is for always sleeping lock types such as mutex_t.
+LD_WAIT_CONFIG is for lock type which are sleeping while spinning on
+PREEMPT_RT such as spinlock_t.
+
+Use LD_WAIT_CONFIG as override.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-3-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index bb5c909458535b..a7f3c6f15125a9 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -608,10 +608,10 @@ static void debug_objects_fill_pool(void)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+- * by temporarily raising the wait-type to WAIT_SLEEP, matching
++ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+ * the preemptible() condition above.
+ */
+- static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+ fill_pool();
+ lock_map_release(&fill_pool_map);
+--
+2.53.0
+
kvm-nvmx-check-for-pending-posted-interrupts-when-lo.patch
kvm-nvmx-fold-requested-virtual-interrupt-check-into.patch
net-annotate-data-races-around-sk-sk_-data_ready-wri.patch
+debugobjects-locking-annotate-debug_object_fill_pool.patch
+debugobjects-allow-to-refill-the-pool-before-system_.patch
+debugobjects-use-ld_wait_config-instead-of-ld_wait_s.patch
+debugobjects-do-not-fill_pool-if-pi_blocked_on.patch
+debugobjects-dont-call-fill_pool-in-early-boot-hardi.patch
+arm-group-is_permission_fault-with-is_translation_fa.patch
+arm-allow-__do_kernel_fault-to-report-execution-of-m.patch
+arm-fix-hash_name-fault.patch
+arm-fix-branch-predictor-hardening.patch
--- /dev/null
+From 12b5d1a683efb3c8108cc3207313db1eb1282b25 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:12 +0200
+Subject: debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 06e0ae988f6e3499785c407429953ade19c1096b upstream.
+
+The pool of free objects is refilled on several occasions such as object
+initialisation. On PREEMPT_RT refilling is limited to preemptible
+sections due to sleeping locks used by the memory allocator. The system
+boots with disabled interrupts so the pool can not be refilled.
+
+If too many objects are initialized and the pool gets empty then
+debugobjects disables itself.
+
+Refiling can also happen early in the boot with disabled interrupts as
+long as the scheduler is not operational. If the scheduler can not
+preempt a task then a sleeping lock can not be contended.
+
+Allow to additionally refill the pool if the scheduler is not
+operational.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-2-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 932e2d8dbd9b9b..d69721bb78b797 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -604,7 +604,7 @@ static void debug_objects_fill_pool(void)
+ * raw_spinlock_t are basically the same type and this lock-type
+ * inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to WAIT_SLEEP, matching
+--
+2.53.0
+
--- /dev/null
+From 8c61c8d212c0f6e38df746af46593ca893b6bf28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:21 +0200
+Subject: debugobjects: Do not fill_pool() if pi_blocked_on
+
+From: Helen Koike <koike@igalia.com>
+
+commit 5f41161059fd0f1bbf18c90f3180e38cc45a14eb upstream.
+
+On RT enabled kernels, fill_pool() ends up calling rtlock_lock(), which
+asserts if current::pi_blocked_on is set, because a task can obviously only
+block on one lock as otherwise the priority inheritenace chain gets
+corrupted.
+
+Prevent this by expanding the conditional to take current::pi_blocked_on
+into account.
+
+Fixes: 4bedcc28469a ("debugobjects: Make them PREEMPT_RT aware")
+Reported-by: syzbot+b8ca586b9fc235f0c0df@syzkaller.appspotmail.com
+Signed-off-by: Helen Koike <koike@igalia.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Link: https://patch.msgid.link/20260511215359.3351259-1-koike@igalia.com
+Closes: https://syzkaller.appspot.com/bug?extid=b8ca586b9fc235f0c0df
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 628fe54d927ecb..c1b8b754572caa 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -596,15 +596,25 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
+ return NULL;
+ }
+
++static inline bool debug_objects_is_pi_blocked_on(void)
++{
++#ifdef CONFIG_RT_MUTEXES
++ return current->pi_blocked_on != NULL;
++#else
++ return false;
++#endif
++}
++
+ static void debug_objects_fill_pool(void)
+ {
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+- * context -- for !RT kernels we rely on the fact that spinlock_t and
+- * raw_spinlock_t are basically the same type and this lock-type
+- * inversion works just fine.
++ * context and not enqueued on an rt_mutex -- for !RT kernels we rely
++ * on the fact that spinlock_t and raw_spinlock_t are basically the
++ * same type and this lock-type inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
++ (preemptible() && !debug_objects_is_pi_blocked_on())) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+--
+2.53.0
+
--- /dev/null
+From 813c2352f2652717378c8f1b4a18b99dafd72fcc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:26 +0200
+Subject: debugobjects: Dont call fill_pool() in early boot hardirq context
+
+From: Waiman Long <longman@redhat.com>
+
+commit 0d046ae106255cba5eb83b23f78ee93f3620247d upstream.
+
+When booting a debug PREEMPT_RT kernel on an ARM64 system, a "inconsistent
+{HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage" lockdep warning message was
+reported to the console.
+
+During early boot, interrupts are enabled before the scheduler is
+enabled. In this window (before SYSTEM_SCHEDULING is set) interrupts can
+fire and in the hard interrupt context handler attempt to fill the pool
+
+This can lead to a deadlock when the interrupt occurred when the interrupt
+hits a region which holds a lock that is required to be taken in the
+allocation path.
+
+Add a new can_fill_pool() helper and reorder the exception rule and forbid
+this scenario by excluding allocations from hard interrupt context.
+
+Fixes: 06e0ae988f6e ("debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260605173038.495075-1-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 44 ++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 36 insertions(+), 8 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index c1b8b754572caa..7abd909c8076af 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -605,20 +605,48 @@ static inline bool debug_objects_is_pi_blocked_on(void)
+ #endif
+ }
+
+-static void debug_objects_fill_pool(void)
++static inline bool can_fill_pool(void)
+ {
+ /*
+- * On RT enabled kernels the pool refill must happen in preemptible
+- * context and not enqueued on an rt_mutex -- for !RT kernels we rely
+- * on the fact that spinlock_t and raw_spinlock_t are basically the
+- * same type and this lock-type inversion works just fine.
++ * On !RT enabled kernels there are no restrictions and spinlock_t and
++ * raw_spinlock_t are the same types.
++ */
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ return true;
++
++ /*
++ * On RT enabled kernels, the task must not be blocked on a lock as
++ * that could corrupt the PI state when blocking on a lock in the
++ * allocation path.
++ */
++ if (debug_objects_is_pi_blocked_on())
++ return false;
++
++ /*
++ * On RT enabled kernels the pool refill should happen in preemptible
++ * context.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
+- (preemptible() && !debug_objects_is_pi_blocked_on())) {
++ if (preemptible())
++ return true;
++
++ /*
++ * Though during system boot before scheduling is set up, preemption is
++ * disabled and the pool can get exhausted. Before scheduling is active
++ * a task cannot be blocked on a sleeping lock, but it might hold a lock
++ * and if interrupted then hard interrupt context might run into a lock
++ * inversion. So exclude hard interrupt context from allocations before
++ * scheduling is active.
++ */
++ return system_state < SYSTEM_SCHEDULING && !in_hardirq();
++}
++
++static void debug_objects_fill_pool(void)
++{
++ if (can_fill_pool()) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+- * the preemptible() condition above.
++ * the preemptible() condition in can_fill_pool().
+ */
+ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+--
+2.53.0
+
--- /dev/null
+From 884c8d325e8c6dd26cc2ae32010fb69e5cad868d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:17 +0200
+Subject: debugobjects: Use LD_WAIT_CONFIG instead of LD_WAIT_SLEEP
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 37de2dbc318ee10577c1c2704de5a803e75e55a2 upstream.
+
+fill_pool_map is used to suppress nesting violations caused by acquiring
+a spinlock_t (from within the memory allocator) while holding a
+raw_spinlock_t. The used annotation is wrong.
+
+LD_WAIT_SLEEP is for always sleeping lock types such as mutex_t.
+LD_WAIT_CONFIG is for lock type which are sleeping while spinning on
+PREEMPT_RT such as spinlock_t.
+
+Use LD_WAIT_CONFIG as override.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-3-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index d69721bb78b797..628fe54d927ecb 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -607,10 +607,10 @@ static void debug_objects_fill_pool(void)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+- * by temporarily raising the wait-type to WAIT_SLEEP, matching
++ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+ * the preemptible() condition above.
+ */
+- static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+ fill_pool();
+ lock_map_release(&fill_pool_map);
+--
+2.53.0
+
net-drop-the-lock-in-skb_may_tx_timestamp.patch
ip6_vti-set-netns_immutable-on-the-fallback-device.patch
reapply-selftest-ptp-update-ptp-selftest-to-exercise-the-gettimex-options.patch
+debugobjects-allow-to-refill-the-pool-before-system_.patch
+debugobjects-use-ld_wait_config-instead-of-ld_wait_s.patch
+debugobjects-do-not-fill_pool-if-pi_blocked_on.patch
+debugobjects-dont-call-fill_pool-in-early-boot-hardi.patch
--- /dev/null
+From 3d6f64a6578fbc098b21153b91241d1405c9c35d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:54:38 +0200
+Subject: debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 06e0ae988f6e3499785c407429953ade19c1096b upstream.
+
+The pool of free objects is refilled on several occasions such as object
+initialisation. On PREEMPT_RT refilling is limited to preemptible
+sections due to sleeping locks used by the memory allocator. The system
+boots with disabled interrupts so the pool can not be refilled.
+
+If too many objects are initialized and the pool gets empty then
+debugobjects disables itself.
+
+Refiling can also happen early in the boot with disabled interrupts as
+long as the scheduler is not operational. If the scheduler can not
+preempt a task then a sleeping lock can not be contended.
+
+Allow to additionally refill the pool if the scheduler is not
+operational.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-2-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index e4b7f77ece3b4f..9d59b797d1b507 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -731,7 +731,7 @@ static void debug_objects_fill_pool(void)
+ * raw_spinlock_t are basically the same type and this lock-type
+ * inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to WAIT_SLEEP, matching
+--
+2.53.0
+
--- /dev/null
+From a2727de7629c871fac17773b06f044f0e97feeba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:54:46 +0200
+Subject: debugobjects: Do not fill_pool() if pi_blocked_on
+
+From: Helen Koike <koike@igalia.com>
+
+commit 5f41161059fd0f1bbf18c90f3180e38cc45a14eb upstream.
+
+On RT enabled kernels, fill_pool() ends up calling rtlock_lock(), which
+asserts if current::pi_blocked_on is set, because a task can obviously only
+block on one lock as otherwise the priority inheritenace chain gets
+corrupted.
+
+Prevent this by expanding the conditional to take current::pi_blocked_on
+into account.
+
+Fixes: 4bedcc28469a ("debugobjects: Make them PREEMPT_RT aware")
+Reported-by: syzbot+b8ca586b9fc235f0c0df@syzkaller.appspotmail.com
+Signed-off-by: Helen Koike <koike@igalia.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Link: https://patch.msgid.link/20260511215359.3351259-1-koike@igalia.com
+Closes: https://syzkaller.appspot.com/bug?extid=b8ca586b9fc235f0c0df
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 4343dc5e5c99da..cbd025dae5ce92 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -711,6 +711,15 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
+ return NULL;
+ }
+
++static inline bool debug_objects_is_pi_blocked_on(void)
++{
++#ifdef CONFIG_RT_MUTEXES
++ return current->pi_blocked_on != NULL;
++#else
++ return false;
++#endif
++}
++
+ static void debug_objects_fill_pool(void)
+ {
+ if (!static_branch_likely(&obj_cache_enabled))
+@@ -727,11 +736,12 @@ static void debug_objects_fill_pool(void)
+
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+- * context -- for !RT kernels we rely on the fact that spinlock_t and
+- * raw_spinlock_t are basically the same type and this lock-type
+- * inversion works just fine.
++ * context and not enqueued on an rt_mutex -- for !RT kernels we rely
++ * on the fact that spinlock_t and raw_spinlock_t are basically the
++ * same type and this lock-type inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
++ (preemptible() && !debug_objects_is_pi_blocked_on())) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+--
+2.53.0
+
--- /dev/null
+From 4f4378e74e7cfd681c329bf4c2a295d900e53a53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:54:51 +0200
+Subject: debugobjects: Dont call fill_pool() in early boot hardirq context
+
+From: Waiman Long <longman@redhat.com>
+
+commit 0d046ae106255cba5eb83b23f78ee93f3620247d upstream.
+
+When booting a debug PREEMPT_RT kernel on an ARM64 system, a "inconsistent
+{HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage" lockdep warning message was
+reported to the console.
+
+During early boot, interrupts are enabled before the scheduler is
+enabled. In this window (before SYSTEM_SCHEDULING is set) interrupts can
+fire and in the hard interrupt context handler attempt to fill the pool
+
+This can lead to a deadlock when the interrupt occurred when the interrupt
+hits a region which holds a lock that is required to be taken in the
+allocation path.
+
+Add a new can_fill_pool() helper and reorder the exception rule and forbid
+this scenario by excluding allocations from hard interrupt context.
+
+Fixes: 06e0ae988f6e ("debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260605173038.495075-1-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 46 +++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 37 insertions(+), 9 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index cbd025dae5ce92..17f116247bb423 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -720,6 +720,41 @@ static inline bool debug_objects_is_pi_blocked_on(void)
+ #endif
+ }
+
++static inline bool can_fill_pool(void)
++{
++ /*
++ * On !RT enabled kernels there are no restrictions and spinlock_t and
++ * raw_spinlock_t are the same types.
++ */
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ return true;
++
++ /*
++ * On RT enabled kernels, the task must not be blocked on a lock as
++ * that could corrupt the PI state when blocking on a lock in the
++ * allocation path.
++ */
++ if (debug_objects_is_pi_blocked_on())
++ return false;
++
++ /*
++ * On RT enabled kernels the pool refill should happen in preemptible
++ * context.
++ */
++ if (preemptible())
++ return true;
++
++ /*
++ * Though during system boot before scheduling is set up, preemption is
++ * disabled and the pool can get exhausted. Before scheduling is active
++ * a task cannot be blocked on a sleeping lock, but it might hold a lock
++ * and if interrupted then hard interrupt context might run into a lock
++ * inversion. So exclude hard interrupt context from allocations before
++ * scheduling is active.
++ */
++ return system_state < SYSTEM_SCHEDULING && !in_hardirq();
++}
++
+ static void debug_objects_fill_pool(void)
+ {
+ if (!static_branch_likely(&obj_cache_enabled))
+@@ -734,18 +769,11 @@ static void debug_objects_fill_pool(void)
+ if (likely(!pool_should_refill(&pool_global)))
+ return;
+
+- /*
+- * On RT enabled kernels the pool refill must happen in preemptible
+- * context and not enqueued on an rt_mutex -- for !RT kernels we rely
+- * on the fact that spinlock_t and raw_spinlock_t are basically the
+- * same type and this lock-type inversion works just fine.
+- */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
+- (preemptible() && !debug_objects_is_pi_blocked_on())) {
++ if (can_fill_pool()) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+- * the preemptible() condition above.
++ * the preemptible() condition in can_fill_pool().
+ */
+ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+--
+2.53.0
+
--- /dev/null
+From 05a54bb56aeea1aab8b1d2c1e336d1df3253773b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:54:42 +0200
+Subject: debugobjects: Use LD_WAIT_CONFIG instead of LD_WAIT_SLEEP
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 37de2dbc318ee10577c1c2704de5a803e75e55a2 upstream.
+
+fill_pool_map is used to suppress nesting violations caused by acquiring
+a spinlock_t (from within the memory allocator) while holding a
+raw_spinlock_t. The used annotation is wrong.
+
+LD_WAIT_SLEEP is for always sleeping lock types such as mutex_t.
+LD_WAIT_CONFIG is for lock type which are sleeping while spinning on
+PREEMPT_RT such as spinlock_t.
+
+Use LD_WAIT_CONFIG as override.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-3-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 9d59b797d1b507..4343dc5e5c99da 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -734,10 +734,10 @@ static void debug_objects_fill_pool(void)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+- * by temporarily raising the wait-type to WAIT_SLEEP, matching
++ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+ * the preemptible() condition above.
+ */
+- static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+ fill_pool();
+ lock_map_release(&fill_pool_map);
+--
+2.53.0
+
net-stmmac-fix-stm32-and-potentially-others-resume-r.patch
fuse-re-lock-request-before-replacing-page-cache-folio.patch
revert-nfsd-defer-sub-object-cleanup-in-export-put-callbacks.patch
+debugobjects-allow-to-refill-the-pool-before-system_.patch
+debugobjects-use-ld_wait_config-instead-of-ld_wait_s.patch
+debugobjects-do-not-fill_pool-if-pi_blocked_on.patch
+debugobjects-dont-call-fill_pool-in-early-boot-hardi.patch
--- /dev/null
+From 5b506c182e8aba4ea5fcce8d9cb4cf5d410885f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:32 +0200
+Subject: ARM: allow __do_kernel_fault() to report execution of memory faults
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit 40b466db1dffb41f0529035c59c5739636d0e5b8 upstream
+
+Allow __do_kernel_fault() to detect the execution of memory, so we can
+provide the same fault message as do_page_fault() would do. This is
+required when we split the kernel address fault handling from the
+main do_page_fault() code path.
+
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 879730a47c4a20..4c0ee81befb1ed 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -176,6 +176,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+ */
+ if (addr < PAGE_SIZE) {
+ msg = "NULL pointer dereference";
++ } else if (is_permission_fault(fsr) && fsr & FSR_LNX_PF) {
++ msg = "execution of memory";
+ } else {
+ if (is_translation_fault(fsr) &&
+ kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
+--
+2.53.0
+
--- /dev/null
+From 7b1fd36e7b330a5a35d5877e2fc6f30f8f43adcf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:34 +0200
+Subject: ARM: fix branch predictor hardening
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit fd2dee1c6e2256f726ba33fd3083a7be0efc80d3 upstream.
+
+__do_user_fault() may be called with indeterminent interrupt enable
+state, which means we may be preemptive at this point. This causes
+problems when calling harden_branch_predictor(). For example, when
+called from a data abort, do_alignment_fault()->do_bad_area().
+
+Move harden_branch_predictor() out of __do_user_fault() and into the
+calling contexts.
+
+Moving it into do_kernel_address_page_fault(), we can be sure that
+interrupts will be disabled here.
+
+Converting do_translation_fault() to use do_kernel_address_page_fault()
+rather than do_bad_area() means that we keep branch predictor handling
+for translation faults. Interrupts will also be disabled at this call
+site.
+
+do_sect_fault() needs special handling, so detect user mode accesses
+to kernel-addresses, and add an explicit call to branch predictor
+hardening.
+
+Finally, add branch predictor hardening to do_alignment() for the
+faulting case (user mode accessing kernel addresses) before interrupts
+are enabled.
+
+This should cover all cases where harden_branch_predictor() is called,
+ensuring that it is always has interrupts disabled, also ensuring that
+it is called early in each call path.
+
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/alignment.c | 4 ++++
+ arch/arm/mm/fault.c | 39 ++++++++++++++++++++++++++-------------
+ 2 files changed, 30 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
+index f8dd0b3cc8e040..ee264737be6d26 100644
+--- a/arch/arm/mm/alignment.c
++++ b/arch/arm/mm/alignment.c
+@@ -22,6 +22,7 @@
+
+ #include <asm/cp15.h>
+ #include <asm/system_info.h>
++#include <asm/system_misc.h>
+ #include <asm/unaligned.h>
+ #include <asm/opcodes.h>
+
+@@ -809,6 +810,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ int thumb2_32b = 0;
+ int fault;
+
++ if (addr >= TASK_SIZE && user_mode(regs))
++ harden_branch_predictor();
++
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 47eecdf29a8312..87ed5da30e44f1 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -199,9 +199,6 @@ __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
+ {
+ struct task_struct *tsk = current;
+
+- if (addr > TASK_SIZE)
+- harden_branch_predictor();
+-
+ #ifdef CONFIG_DEBUG_USER
+ if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
+ ((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
+@@ -252,8 +249,10 @@ do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr,
+ /*
+ * Fault from user mode for a kernel space address. User mode
+ * should not be faulting in kernel space, which includes the
+- * vector/khelper page. Send a SIGSEGV.
++ * vector/khelper page. Handle the branch predictor hardening
++ * while interrupts are still disabled, then send a SIGSEGV.
+ */
++ harden_branch_predictor();
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ } else {
+ /*
+@@ -423,16 +422,20 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ * We enter here because the first level page table doesn't contain
+ * a valid entry for the address.
+ *
+- * If the address is in kernel space (>= TASK_SIZE), then we are
+- * probably faulting in the vmalloc() area.
++ * If this is a user address (addr < TASK_SIZE), we handle this as a
++ * normal page fault. This leaves the remainder of the function to handle
++ * kernel address translation faults.
+ *
+- * If the init_task's first level page tables contains the relevant
+- * entry, we copy the it to this task. If not, we send the process
+- * a signal, fixup the exception, or oops the kernel.
++ * Since user mode is not permitted to access kernel addresses, pass these
++ * directly to do_kernel_address_page_fault() to handle.
+ *
+- * NOTE! We MUST NOT take any locks for this case. We may be in an
+- * interrupt or a critical region, and should only copy the information
+- * from the master page table, nothing more.
++ * Otherwise, we're probably faulting in the vmalloc() area, so try to fix
++ * that up. Note that we must not take any locks or enable interrupts in
++ * this case.
++ *
++ * If vmalloc() fixup fails, that means the non-leaf page tables did not
++ * contain an entry for this address, so handle this via
++ * do_kernel_address_page_fault().
+ */
+ #ifdef CONFIG_MMU
+ static int __kprobes
+@@ -498,7 +501,8 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ return 0;
+
+ bad_area:
+- do_bad_area(addr, fsr, regs);
++ do_kernel_address_page_fault(current->mm, addr, fsr, regs);
++
+ return 0;
+ }
+ #else /* CONFIG_MMU */
+@@ -518,7 +522,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ static int
+ do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
++ /*
++ * If this is a kernel address, but from user mode, then userspace
++ * is trying bad stuff. Invoke the branch predictor handling.
++ * Interrupts are disabled here.
++ */
++ if (addr >= TASK_SIZE && user_mode(regs))
++ harden_branch_predictor();
++
+ do_bad_area(addr, fsr, regs);
++
+ return 0;
+ }
+ #endif /* CONFIG_ARM_LPAE */
+--
+2.53.0
+
--- /dev/null
+From 83b9f7020ba31768a5eed6c46aef3623442184e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:33 +0200
+Subject: ARM: fix hash_name() fault
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit 7733bc7d299d682f2723dc38fc7f370b9bf973e9 upstream.
+
+Zizhi Wo reports:
+
+"During the execution of hash_name()->load_unaligned_zeropad(), a
+ potential memory access beyond the PAGE boundary may occur. For
+ example, when the filename length is near the PAGE_SIZE boundary.
+ This triggers a page fault, which leads to a call to
+ do_page_fault()->mmap_read_trylock(). If we can't acquire the lock,
+ we have to fall back to the mmap_read_lock() path, which calls
+ might_sleep(). This breaks RCU semantics because path lookup occurs
+ under an RCU read-side critical section."
+
+This is seen with CONFIG_DEBUG_ATOMIC_SLEEP=y and CONFIG_KFENCE=y.
+
+Kernel addresses (with the exception of the vectors/kuser helper
+page) do not have VMAs associated with them. If the vectors/kuser
+helper page faults, then there are two possibilities:
+
+1. if the fault happened while in kernel mode, then we're basically
+ dead, because the CPU won't be able to vector through this page
+ to handle the fault.
+2. if the fault happened while in user mode, that means the page was
+ protected from user access, and we want to fault anyway.
+
+Thus, we can handle kernel addresses from any context entirely
+separately without going anywhere near the mmap lock. This gives us
+an entirely non-sleeping path for all kernel mode kernel address
+faults.
+
+As we handle the kernel address faults before interrupts are enabled,
+this change has the side effect of improving the branch predictor
+hardening, but does not completely solve the issue.
+
+Reported-by: Zizhi Wo <wozizhi@huaweicloud.com>
+Reported-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Link: https://lore.kernel.org/r/20251126090505.3057219-1-wozizhi@huaweicloud.com
+Reviewed-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Tested-by: Xie Yuanbin <xieyuanbin1@huawei.com>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 35 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 4c0ee81befb1ed..47eecdf29a8312 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -244,6 +244,35 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+ #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
++static int __kprobes
++do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr,
++ unsigned int fsr, struct pt_regs *regs)
++{
++ if (user_mode(regs)) {
++ /*
++ * Fault from user mode for a kernel space address. User mode
++ * should not be faulting in kernel space, which includes the
++ * vector/khelper page. Send a SIGSEGV.
++ */
++ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
++ } else {
++ /*
++ * Fault from kernel mode. Enable interrupts if they were
++ * enabled in the parent context. Section (upper page table)
++ * translation faults are handled via do_translation_fault(),
++ * so we will only get here for a non-present kernel space
++ * PTE or PTE permission fault. This may happen in exceptional
++ * circumstances and need the fixup tables to be walked.
++ */
++ if (interrupts_enabled(regs))
++ local_irq_enable();
++
++ __do_kernel_fault(mm, addr, fsr, regs);
++ }
++
++ return 0;
++}
++
+ static int __kprobes
+ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
+@@ -257,6 +286,12 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ if (kprobe_page_fault(regs, fsr))
+ return 0;
+
++ /*
++ * Handle kernel addresses faults separately, which avoids touching
++ * the mmap lock from contexts that are not able to sleep.
++ */
++ if (addr >= TASK_SIZE)
++ return do_kernel_address_page_fault(mm, addr, fsr, regs);
+
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (interrupts_enabled(regs))
+--
+2.53.0
+
--- /dev/null
+From 5524d39d849a013ac180a852c8bd9eb6d32ee67b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 12:26:31 +0200
+Subject: ARM: group is_permission_fault() with is_translation_fault()
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+commit dea20281ac88226615761c570c8ff7adc18e6ac2 upstream.
+
+Group is_permission_fault() with is_translation_fault(), which is
+needed to use is_permission_fault() in __do_kernel_fault(). As
+this is static inline, there is no need for this to be under
+CONFIG_MMU.
+
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mm/fault.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index ed1a25f457e48e..879730a47c4a20 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -128,6 +128,19 @@ static inline bool is_translation_fault(unsigned int fsr)
+ return false;
+ }
+
++static inline bool is_permission_fault(unsigned int fsr)
++{
++ int fs = fsr_fs(fsr);
++#ifdef CONFIG_ARM_LPAE
++ if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
++ return true;
++#else
++ if (fs == FS_L1_PERM || fs == FS_L2_PERM)
++ return true;
++#endif
++ return false;
++}
++
+ static void die_kernel_fault(const char *msg, struct mm_struct *mm,
+ unsigned long addr, unsigned int fsr,
+ struct pt_regs *regs)
+@@ -229,19 +242,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+ #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
+
+-static inline bool is_permission_fault(unsigned int fsr)
+-{
+- int fs = fsr_fs(fsr);
+-#ifdef CONFIG_ARM_LPAE
+- if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
+- return true;
+-#else
+- if (fs == FS_L1_PERM || fs == FS_L2_PERM)
+- return true;
+-#endif
+- return false;
+-}
+-
+ static int __kprobes
+ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
+--
+2.53.0
+
--- /dev/null
+From 5fcaa55af1d122a45987dd987a0aae167010cedd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:12 +0200
+Subject: debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 06e0ae988f6e3499785c407429953ade19c1096b upstream.
+
+The pool of free objects is refilled on several occasions such as object
+initialisation. On PREEMPT_RT refilling is limited to preemptible
+sections due to sleeping locks used by the memory allocator. The system
+boots with disabled interrupts so the pool can not be refilled.
+
+If too many objects are initialized and the pool gets empty then
+debugobjects disables itself.
+
+Refiling can also happen early in the boot with disabled interrupts as
+long as the scheduler is not operational. If the scheduler can not
+preempt a task then a sleeping lock can not be contended.
+
+Allow to additionally refill the pool if the scheduler is not
+operational.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-2-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 35cd384f7e8a2f..5b462a45a9c17b 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -605,7 +605,7 @@ static void debug_objects_fill_pool(void)
+ * raw_spinlock_t are basically the same type and this lock-type
+ * inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to WAIT_SLEEP, matching
+--
+2.53.0
+
--- /dev/null
+From 126dc6c2c747f2864e665ce22c7734af591db48c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:21 +0200
+Subject: debugobjects: Do not fill_pool() if pi_blocked_on
+
+From: Helen Koike <koike@igalia.com>
+
+commit 5f41161059fd0f1bbf18c90f3180e38cc45a14eb upstream.
+
+On RT enabled kernels, fill_pool() ends up calling rtlock_lock(), which
+asserts if current::pi_blocked_on is set, because a task can obviously only
+block on one lock as otherwise the priority inheritenace chain gets
+corrupted.
+
+Prevent this by expanding the conditional to take current::pi_blocked_on
+into account.
+
+Fixes: 4bedcc28469a ("debugobjects: Make them PREEMPT_RT aware")
+Reported-by: syzbot+b8ca586b9fc235f0c0df@syzkaller.appspotmail.com
+Signed-off-by: Helen Koike <koike@igalia.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Link: https://patch.msgid.link/20260511215359.3351259-1-koike@igalia.com
+Closes: https://syzkaller.appspot.com/bug?extid=b8ca586b9fc235f0c0df
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index bebc00aacafedd..b1c3e873a71d40 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -597,15 +597,25 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
+ return NULL;
+ }
+
++static inline bool debug_objects_is_pi_blocked_on(void)
++{
++#ifdef CONFIG_RT_MUTEXES
++ return current->pi_blocked_on != NULL;
++#else
++ return false;
++#endif
++}
++
+ static void debug_objects_fill_pool(void)
+ {
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+- * context -- for !RT kernels we rely on the fact that spinlock_t and
+- * raw_spinlock_t are basically the same type and this lock-type
+- * inversion works just fine.
++ * context and not enqueued on an rt_mutex -- for !RT kernels we rely
++ * on the fact that spinlock_t and raw_spinlock_t are basically the
++ * same type and this lock-type inversion works just fine.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
++ (preemptible() && !debug_objects_is_pi_blocked_on())) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+--
+2.53.0
+
--- /dev/null
+From 1f0ca563af654ec409735e370fc93240bc83f7d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:26 +0200
+Subject: debugobjects: Dont call fill_pool() in early boot hardirq context
+
+From: Waiman Long <longman@redhat.com>
+
+commit 0d046ae106255cba5eb83b23f78ee93f3620247d upstream.
+
+When booting a debug PREEMPT_RT kernel on an ARM64 system, a "inconsistent
+{HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage" lockdep warning message was
+reported to the console.
+
+During early boot, interrupts are enabled before the scheduler is
+enabled. In this window (before SYSTEM_SCHEDULING is set) interrupts can
+fire and in the hard interrupt context handler attempt to fill the pool
+
+This can lead to a deadlock when the interrupt occurred when the interrupt
+hits a region which holds a lock that is required to be taken in the
+allocation path.
+
+Add a new can_fill_pool() helper and reorder the exception rule and forbid
+this scenario by excluding allocations from hard interrupt context.
+
+Fixes: 06e0ae988f6e ("debugobjects: Allow to refill the pool before SYSTEM_SCHEDULING")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@kernel.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260605173038.495075-1-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 44 ++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 36 insertions(+), 8 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index b1c3e873a71d40..e4ef9d032d1749 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -606,20 +606,48 @@ static inline bool debug_objects_is_pi_blocked_on(void)
+ #endif
+ }
+
+-static void debug_objects_fill_pool(void)
++static inline bool can_fill_pool(void)
+ {
+ /*
+- * On RT enabled kernels the pool refill must happen in preemptible
+- * context and not enqueued on an rt_mutex -- for !RT kernels we rely
+- * on the fact that spinlock_t and raw_spinlock_t are basically the
+- * same type and this lock-type inversion works just fine.
++ * On !RT enabled kernels there are no restrictions and spinlock_t and
++ * raw_spinlock_t are the same types.
++ */
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ return true;
++
++ /*
++ * On RT enabled kernels, the task must not be blocked on a lock as
++ * that could corrupt the PI state when blocking on a lock in the
++ * allocation path.
++ */
++ if (debug_objects_is_pi_blocked_on())
++ return false;
++
++ /*
++ * On RT enabled kernels the pool refill should happen in preemptible
++ * context.
+ */
+- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING ||
+- (preemptible() && !debug_objects_is_pi_blocked_on())) {
++ if (preemptible())
++ return true;
++
++ /*
++ * Though during system boot before scheduling is set up, preemption is
++ * disabled and the pool can get exhausted. Before scheduling is active
++ * a task cannot be blocked on a sleeping lock, but it might hold a lock
++ * and if interrupted then hard interrupt context might run into a lock
++ * inversion. So exclude hard interrupt context from allocations before
++ * scheduling is active.
++ */
++ return system_state < SYSTEM_SCHEDULING && !in_hardirq();
++}
++
++static void debug_objects_fill_pool(void)
++{
++ if (can_fill_pool()) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+- * the preemptible() condition above.
++ * the preemptible() condition in can_fill_pool().
+ */
+ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+--
+2.53.0
+
--- /dev/null
+From ff13abd57c38583c7704613f7f3090a2c74716ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 11:55:17 +0200
+Subject: debugobjects: Use LD_WAIT_CONFIG instead of LD_WAIT_SLEEP
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+commit 37de2dbc318ee10577c1c2704de5a803e75e55a2 upstream.
+
+fill_pool_map is used to suppress nesting violations caused by acquiring
+a spinlock_t (from within the memory allocator) while holding a
+raw_spinlock_t. The used annotation is wrong.
+
+LD_WAIT_SLEEP is for always sleeping lock types such as mutex_t.
+LD_WAIT_CONFIG is for lock type which are sleeping while spinning on
+PREEMPT_RT such as spinlock_t.
+
+Use LD_WAIT_CONFIG as override.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://patch.msgid.link/20251127153652.291697-3-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/debugobjects.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/debugobjects.c b/lib/debugobjects.c
+index 5b462a45a9c17b..bebc00aacafedd 100644
+--- a/lib/debugobjects.c
++++ b/lib/debugobjects.c
+@@ -608,10 +608,10 @@ static void debug_objects_fill_pool(void)
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+- * by temporarily raising the wait-type to WAIT_SLEEP, matching
++ * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
+ * the preemptible() condition above.
+ */
+- static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
+ lock_map_acquire_try(&fill_pool_map);
+ fill_pool();
+ lock_map_release(&fill_pool_map);
+--
+2.53.0
+
--- /dev/null
+From 9566e0e5e46151432d51d44b6699747369eb9faf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jun 2026 10:03:43 +0000
+Subject: KVM: VMX: Update SVI during runtime APICv activation
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+[ Upstream commit b2849bec936be642b5420801f902337f2507648e ]
+
+The APICv (apic->apicv_active) can be activated or deactivated at runtime,
+for instance, because of APICv inhibit reasons. Intel VMX employs different
+mechanisms to virtualize LAPIC based on whether APICv is active.
+
+When APICv is activated at runtime, GUEST_INTR_STATUS is used to configure
+and report the current pending IRR and ISR states. Unless a specific vector
+is explicitly included in EOI_EXIT_BITMAP, its EOI will not be trapped to
+KVM. Intel VMX automatically clears the corresponding ISR bit based on the
+GUEST_INTR_STATUS.SVI field.
+
+When APICv is deactivated at runtime, the VM_ENTRY_INTR_INFO_FIELD is used
+to specify the next interrupt vector to invoke upon VM-entry. The
+VMX IDT_VECTORING_INFO_FIELD is used to report un-invoked vectors on
+VM-exit. EOIs are always trapped to KVM, so the software can manually clear
+pending ISR bits.
+
+There are scenarios where, with APICv activated at runtime, a guest-issued
+EOI may not be able to clear the pending ISR bit.
+
+Taking vector 236 as an example, here is one scenario.
+
+1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
+2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
+and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
+3. After VM-entry, vector 236 is invoked through the guest IDT. At this
+point, the data in VM_ENTRY_INTR_INFO_FIELD is no longer valid. The guest
+interrupt handler for vector 236 is invoked.
+4. Suppose a VM exit occurs very early in the guest interrupt handler,
+before the EOI is issued.
+5. Nothing is reported through the IDT_VECTORING_INFO_FIELD because
+vector 236 has already been invoked in the guest.
+6. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
+kvm_vcpu_update_apicv() to activate APICv.
+7. Unfortunately, GUEST_INTR_STATUS.SVI is not configured, although
+vector 236 is still pending in the ISR.
+8. After VM-entry, the guest finally issues the EOI for vector 236.
+However, because SVI is not configured, vector 236 is not cleared.
+9. ISR is stalled forever on vector 236.
+
+Here is another scenario.
+
+1. Suppose APICv is inactive. Vector 236 is pending in the IRR.
+2. To handle KVM_REQ_EVENT, KVM moves vector 236 from the IRR to the ISR,
+and configures the VM_ENTRY_INTR_INFO_FIELD via vmx_inject_irq().
+3. VM-exit occurs immediately after the next VM-entry. The vector 236 is
+not invoked through the guest IDT. Instead, it is saved to the
+IDT_VECTORING_INFO_FIELD during the VM-exit.
+4. KVM calls kvm_queue_interrupt() to re-queue the un-invoked vector 236
+into vcpu->arch.interrupt. A KVM_REQ_EVENT is requested.
+5. Now, suppose APICv is activated. Before the next VM-entry, KVM calls
+kvm_vcpu_update_apicv() to activate APICv.
+6. Although APICv is now active, KVM still uses the legacy
+VM_ENTRY_INTR_INFO_FIELD to re-inject vector 236. GUEST_INTR_STATUS.SVI is
+not configured.
+7. After the next VM-entry, vector 236 is invoked through the guest IDT.
+Finally, an EOI occurs. However, due to the lack of GUEST_INTR_STATUS.SVI
+configuration, vector 236 is not cleared from the ISR.
+8. ISR is stalled forever on vector 236.
+
+Using QEMU as an example, vector 236 is stuck in ISR forever.
+
+(qemu) info lapic 1
+dumping local APIC state for CPU 1
+
+LVT0 0x00010700 active-hi edge masked ExtINT (vec 0)
+LVT1 0x00010400 active-hi edge masked NMI
+LVTPC 0x00000400 active-hi edge NMI
+LVTERR 0x000000fe active-hi edge Fixed (vec 254)
+LVTTHMR 0x00010000 active-hi edge masked Fixed (vec 0)
+LVTT 0x000400ec active-hi edge tsc-deadline Fixed (vec 236)
+Timer DCR=0x0 (divide by 2) initial_count = 0 current_count = 0
+SPIV 0x000001ff APIC enabled, focus=off, spurious vec 255
+ICR 0x000000fd physical edge de-assert no-shorthand
+ICR2 0x00000000 cpu 0 (X2APIC ID)
+ESR 0x00000000
+ISR 236
+IRR 37(level) 236
+
+The issue isn't applicable to AMD SVM as KVM simply writes vmcb01 directly
+irrespective of whether L1 (vmcs01) or L2 (vmcb02) is active (unlike VMX,
+there is no need/cost to switch between VMCBs). In addition,
+APICV_INHIBIT_REASON_IRQWIN ensures AMD SVM AVIC is not activated until
+the last interrupt is EOI'd.
+
+Fix the bug by configuring Intel VMX GUEST_INTR_STATUS.SVI if APICv is
+activated at runtime.
+
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Reviewed-by: Chao Gao <chao.gao@intel.com>
+Link: https://patch.msgid.link/20251110063212.34902-1-dongli.zhang@oracle.com
+[sean: call out that SVM writes vmcb01 directly, tweak comment]
+Link: https://patch.msgid.link/20251205231913.441872-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+[gulshan: resolved a minor conflict in vmx.c arising from a comment]
+Signed-off-by: Gulshan Gabel <gulshan.gabel@nutanix.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/vmx.c | 4 ----
+ arch/x86/kvm/x86.c | 7 +++++++
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 4a45e86c5e2fcc..85d301a03b2004 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6851,10 +6851,6 @@ static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
+ * VM-Exit, otherwise L1 with run with a stale SVI.
+ */
+ if (is_guest_mode(vcpu)) {
+- /*
+- * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
+- * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
+- */
+ to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
+ return;
+ }
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 3838b7336590dd..c04277b35e2edf 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10426,9 +10426,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
+ * still active when the interrupt got accepted. Make sure
+ * kvm_check_and_inject_events() is called to check for that.
++ *
++ * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
++ * highest bit in vISR and the next accelerated EOI in the guest won't
++ * be virtualized correctly (the CPU uses SVI to determine which vISR
++ * vector to clear).
+ */
+ if (!apic->apicv_active)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
++ else
++ kvm_apic_update_hwapic_isr(vcpu);
+
+ out:
+ preempt_enable();
+--
+2.53.0
+
drm-v3d-store-the-active-job-inside-the-queue-s-stat.patch
drm-v3d-skip-csd-when-it-has-zeroed-workgroups.patch
batman-adv-tt-prevent-tvlv-entry-number-overflow.patch
+debugobjects-allow-to-refill-the-pool-before-system_.patch
+debugobjects-use-ld_wait_config-instead-of-ld_wait_s.patch
+debugobjects-do-not-fill_pool-if-pi_blocked_on.patch
+debugobjects-dont-call-fill_pool-in-early-boot-hardi.patch
+arm-group-is_permission_fault-with-is_translation_fa.patch
+arm-allow-__do_kernel_fault-to-report-execution-of-m.patch
+arm-fix-hash_name-fault.patch
+arm-fix-branch-predictor-hardening.patch
+kvm-vmx-update-svi-during-runtime-apicv-activation.patch