From: Greg Kroah-Hartman Date: Tue, 12 May 2026 13:29:46 +0000 (+0200) Subject: 7.0-stable patches X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c193c8d22cd0fd83f39cffef38f05a8cf2af3704;p=thirdparty%2Fkernel%2Fstable-queue.git 7.0-stable patches added patches: accel-ivpu-disallow-re-exporting-imported-gem-objects.patch arm64-signal-preserve-por_el0-if-poe_context-is-missing.patch fanotify-fix-false-positive-on-permission-events.patch kvm-arm64-fix-kvm_vcpu_initialized-macro-parameter.patch loongarch-fix-sym_sigfunc_start-definition-for-32bit.patch loongarch-kvm-compile-switch.s-directly-into-the-kernel.patch mm-hugetlb_cma-round-up-per_node-before-logging-it.patch mptcp-pm-add_addr-rtx-skip-inactive-subflows.patch mtd-spi-nor-debugfs-fix-out-of-bounds-read-in-spi_nor_params_show.patch net-rtnetlink-zero-ifla_vf_broadcast-to-avoid-stack-infoleak-in-rtnl_fill_vfinfo.patch perf-x86-intel-improve-validation-and-configuration-of-acr-masks.patch rseq-don-t-advertise-time-slice-extensions-if-disabled.patch rseq-protect-rseq_reset-against-interrupts.patch rseq-set-rseq-cpu_id_start-to-0-on-unregistration.patch selftests-rseq-don-t-run-tests-with-runner-scripts-outside-of-the-scripts.patch selftests-rseq-expand-for-optimized-rseq-abi-v2.patch selftests-rseq-make-registration-flexible-for-legacy-and-optimized-mode.patch selftests-rseq-skip-tests-if-time-slice-extensions-are-not-available.patch selftests-rseq-validate-legacy-behavior.patch --- diff --git a/queue-7.0/accel-ivpu-disallow-re-exporting-imported-gem-objects.patch b/queue-7.0/accel-ivpu-disallow-re-exporting-imported-gem-objects.patch new file mode 100644 index 0000000000..e480e03c1b --- /dev/null +++ b/queue-7.0/accel-ivpu-disallow-re-exporting-imported-gem-objects.patch @@ -0,0 +1,63 @@ +From 7dd57d7a6350770dfc283287125c409e995200e0 Mon Sep 17 00:00:00 2001 +From: Karol Wachowski +Date: Thu, 30 Apr 2026 11:56:44 +0200 +Subject: accel/ivpu: Disallow re-exporting imported GEM objects + +From: Karol Wachowski + +commit 7dd57d7a6350770dfc283287125c409e995200e0 upstream. + +Prevent re-exporting of imported GEM buffers by adding a custom +prime_handle_to_fd callback that checks if the object is imported +and returns -EOPNOTSUPP if so. + +Re-exporting imported GEM buffers causes loss of buffer flags settings, +leading to incorrect device access and data corruption. + +Reported-by: Yametsu +Fixes: 57557964b582 ("accel/ivpu: Add support for userptr buffer objects") +Reviewed-by: Andrzej Kacprowski +Signed-off-by: Karol Wachowski +Cc: # v6.19+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/accel/ivpu/ivpu_drv.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +--- a/drivers/accel/ivpu/ivpu_drv.c ++++ b/drivers/accel/ivpu/ivpu_drv.c +@@ -460,6 +460,26 @@ static const struct file_operations ivpu + #endif + }; + ++static int ivpu_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, ++ u32 handle, u32 flags, int *prime_fd) ++{ ++ struct drm_gem_object *obj; ++ ++ obj = drm_gem_object_lookup(file_priv, handle); ++ if (!obj) ++ return -ENOENT; ++ ++ if (drm_gem_is_imported(obj)) { ++ /* Do not allow re-exporting */ ++ drm_gem_object_put(obj); ++ return -EOPNOTSUPP; ++ } ++ ++ drm_gem_object_put(obj); ++ ++ return drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); ++} ++ + static const struct drm_driver driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + +@@ -468,6 +488,7 @@ static const struct drm_driver driver = + + .gem_create_object = ivpu_gem_create_object, + .gem_prime_import = ivpu_gem_prime_import, ++ .prime_handle_to_fd = ivpu_gem_prime_handle_to_fd, + + .ioctls = ivpu_drm_ioctls, + .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), diff --git a/queue-7.0/arm64-signal-preserve-por_el0-if-poe_context-is-missing.patch b/queue-7.0/arm64-signal-preserve-por_el0-if-poe_context-is-missing.patch new file mode 100644 index 0000000000..03e1cedbea --- /dev/null +++ b/queue-7.0/arm64-signal-preserve-por_el0-if-poe_context-is-missing.patch @@ -0,0 +1,158 @@ +From 030e8a40fff65ca6ac1c04a4d3c08afe72438922 Mon Sep 17 00:00:00 2001 +From: Kevin Brodsky +Date: Mon, 27 Apr 2026 13:03:33 +0100 +Subject: arm64: signal: Preserve POR_EL0 if poe_context is missing + +From: Kevin Brodsky + +commit 030e8a40fff65ca6ac1c04a4d3c08afe72438922 upstream. + +Commit 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to +avoid uaccess failures") delayed the write to POR_EL0 in +rt_sigreturn to avoid spurious uaccess failures. This change however +relies on the poe_context frame record being present: on a system +supporting POE, calling sigreturn without a poe_context record now +results in writing arbitrary data from the kernel stack into POR_EL0. + +Fix this by adding a __valid_fields member to struct +user_access_state, and zeroing the struct on allocation. +restore_poe_context() then indicates that the por_el0 field is valid +by setting the corresponding bit in __valid_fields, and +restore_user_access_state() only touches POR_EL0 if there is a valid +value to set it to. This is in line with how POR_EL0 was originally +handled; all frame records are currently optional, except +fpsimd_context. + +To ensure that __valid_fields is kept in sync, fields (currently +just por_el0) are now accessed via accessors and prefixed with __ to +discourage direct access. + +Fixes: 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to avoid uaccess failures") +Cc: +Reported-by: Will Deacon +Signed-off-by: Kevin Brodsky +Signed-off-by: Catalin Marinas +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/signal.c | 54 +++++++++++++++++++++++++++++++++++---------- + 1 file changed, 43 insertions(+), 11 deletions(-) + +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -67,6 +67,9 @@ struct rt_sigframe_user_layout { + unsigned long end_offset; + }; + ++#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) ++#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) ++ + /* + * Holds any EL0-controlled state that influences unprivileged memory accesses. + * This includes both accesses done in userspace and uaccess done in the kernel. +@@ -74,13 +77,35 @@ struct rt_sigframe_user_layout { + * This state needs to be carefully managed to ensure that it doesn't cause + * uaccess to fail when setting up the signal frame, and the signal handler + * itself also expects a well-defined state when entered. ++ * ++ * The struct should be zero-initialised. Its members should only be accessed ++ * via the accessors below. __valid_fields tracks which of the fields are valid ++ * (have been set to some value). + */ + struct user_access_state { +- u64 por_el0; ++ unsigned int __valid_fields; ++ u64 __por_el0; + }; + +-#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +-#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) ++#define UA_STATE_HAS_POR_EL0 BIT(0) ++ ++static void set_ua_state_por_el0(struct user_access_state *ua_state, ++ u64 por_el0) ++{ ++ ua_state->__por_el0 = por_el0; ++ ua_state->__valid_fields |= UA_STATE_HAS_POR_EL0; ++} ++ ++static int get_ua_state_por_el0(const struct user_access_state *ua_state, ++ u64 *por_el0) ++{ ++ if (ua_state->__valid_fields & UA_STATE_HAS_POR_EL0) { ++ *por_el0 = ua_state->__por_el0; ++ return 0; ++ } ++ ++ return -ENOENT; ++} + + /* + * Save the user access state into ua_state and reset it to disable any +@@ -94,7 +119,7 @@ static void save_reset_user_access_state + for (int pkey = 0; pkey < arch_max_pkey(); pkey++) + por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX); + +- ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); ++ set_ua_state_por_el0(ua_state, read_sysreg_s(SYS_POR_EL0)); + write_sysreg_s(por_enable_all, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - +@@ -122,8 +147,10 @@ static void set_handler_user_access_stat + */ + static void restore_user_access_state(const struct user_access_state *ua_state) + { +- if (system_supports_poe()) +- write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); ++ u64 por_el0; ++ ++ if (get_ua_state_por_el0(ua_state, &por_el0) == 0) ++ write_sysreg_s(por_el0, SYS_POR_EL0); + } + + static void init_user_layout(struct rt_sigframe_user_layout *user) +@@ -333,11 +360,16 @@ static int restore_fpmr_context(struct u + static int preserve_poe_context(struct poe_context __user *ctx, + const struct user_access_state *ua_state) + { +- int err = 0; ++ int err; ++ u64 por_el0; ++ ++ err = get_ua_state_por_el0(ua_state, &por_el0); ++ if (WARN_ON_ONCE(err)) ++ return err; + + __put_user_error(POE_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(*ctx), &ctx->head.size, err); +- __put_user_error(ua_state->por_el0, &ctx->por_el0, err); ++ __put_user_error(por_el0, &ctx->por_el0, err); + + return err; + } +@@ -353,7 +385,7 @@ static int restore_poe_context(struct us + + __get_user_error(por_el0, &(user->poe->por_el0), err); + if (!err) +- ua_state->por_el0 = por_el0; ++ set_ua_state_por_el0(ua_state, por_el0); + + return err; + } +@@ -1095,7 +1127,7 @@ SYSCALL_DEFINE0(rt_sigreturn) + { + struct pt_regs *regs = current_pt_regs(); + struct rt_sigframe __user *frame; +- struct user_access_state ua_state; ++ struct user_access_state ua_state = {}; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; +@@ -1507,7 +1539,7 @@ static int setup_rt_frame(int usig, stru + { + struct rt_sigframe_user_layout user; + struct rt_sigframe __user *frame; +- struct user_access_state ua_state; ++ struct user_access_state ua_state = {}; + int err = 0; + + fpsimd_save_and_flush_current_state(); diff --git a/queue-7.0/fanotify-fix-false-positive-on-permission-events.patch b/queue-7.0/fanotify-fix-false-positive-on-permission-events.patch new file mode 100644 index 0000000000..80c5835e19 --- /dev/null +++ b/queue-7.0/fanotify-fix-false-positive-on-permission-events.patch @@ -0,0 +1,86 @@ +From 7746e3bd4cc19b5092e00d32d676e329bfcb6900 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 10 Apr 2026 16:49:47 +0200 +Subject: fanotify: fix false positive on permission events + +From: Miklos Szeredi + +commit 7746e3bd4cc19b5092e00d32d676e329bfcb6900 upstream. + +fsnotify_get_mark_safe() may return false for a mark on an unrelated group, +which results in bypassing the permission check. + +Fix by skipping over detached marks that are not in the current group. + +CC: stable@vger.kernel.org +Fixes: abc77577a669 ("fsnotify: Provide framework for dropping SRCU lock in ->handle_event") +Signed-off-by: Miklos Szeredi +Link: https://patch.msgid.link/20260410144950.156160-1-mszeredi@redhat.com +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman +--- + fs/notify/fsnotify.c | 2 +- + fs/notify/mark.c | 18 +++++++++++------- + include/linux/fsnotify_backend.h | 1 + + 3 files changed, 13 insertions(+), 8 deletions(-) + +--- a/fs/notify/fsnotify.c ++++ b/fs/notify/fsnotify.c +@@ -388,7 +388,7 @@ static struct fsnotify_mark *fsnotify_fi + return hlist_entry_safe(node, struct fsnotify_mark, obj_list); + } + +-static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) ++struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) + { + struct hlist_node *node = NULL; + +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -457,9 +457,6 @@ EXPORT_SYMBOL_GPL(fsnotify_put_mark); + */ + static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) + { +- if (!mark) +- return true; +- + if (refcount_inc_not_zero(&mark->refcnt)) { + spin_lock(&mark->lock); + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { +@@ -500,15 +497,22 @@ bool fsnotify_prepare_user_wait(struct f + int type; + + fsnotify_foreach_iter_type(type) { ++ struct fsnotify_mark *mark = iter_info->marks[type]; ++ + /* This can fail if mark is being removed */ +- if (!fsnotify_get_mark_safe(iter_info->marks[type])) { +- __release(&fsnotify_mark_srcu); +- goto fail; ++ while (mark && !fsnotify_get_mark_safe(mark)) { ++ if (mark->group == iter_info->current_group) { ++ __release(&fsnotify_mark_srcu); ++ goto fail; ++ } ++ /* This is a mark in an unrelated group, skip */ ++ mark = fsnotify_next_mark(mark); ++ iter_info->marks[type] = mark; + } + } + + /* +- * Now that both marks are pinned by refcount in the inode / vfsmount ++ * Now that all marks are pinned by refcount in the inode / vfsmount / etc + * lists, we can drop SRCU lock, and safely resume the list iteration + * once userspace returns. + */ +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -915,6 +915,7 @@ extern void fsnotify_clear_marks_by_grou + unsigned int obj_type); + extern void fsnotify_get_mark(struct fsnotify_mark *mark); + extern void fsnotify_put_mark(struct fsnotify_mark *mark); ++struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark); + extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); + extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); + diff --git a/queue-7.0/kvm-arm64-fix-kvm_vcpu_initialized-macro-parameter.patch b/queue-7.0/kvm-arm64-fix-kvm_vcpu_initialized-macro-parameter.patch new file mode 100644 index 0000000000..96cfd104e4 --- /dev/null +++ b/queue-7.0/kvm-arm64-fix-kvm_vcpu_initialized-macro-parameter.patch @@ -0,0 +1,36 @@ +From d89fdda7dd8a488f922e1175e6782f781ba8a23b Mon Sep 17 00:00:00 2001 +From: Fuad Tabba +Date: Fri, 24 Apr 2026 09:49:06 +0100 +Subject: KVM: arm64: Fix kvm_vcpu_initialized() macro parameter + +From: Fuad Tabba + +commit d89fdda7dd8a488f922e1175e6782f781ba8a23b upstream. + +The macro is defined with parameter 'v' but the body references the +literal token 'vcpu' instead, causing it to silently operate on whatever +'vcpu' resolves to in the caller's scope rather than the value passed by +the caller. All current call sites happen to use a variable named 'vcpu', +so the bug is latent. + +Fixes: e016333745c7 ("KVM: arm64: Only reset vCPU-scoped feature ID regs once") +Signed-off-by: Fuad Tabba +Link: https://patch.msgid.link/20260424084908.370776-5-tabba@google.com +Signed-off-by: Marc Zyngier +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/include/asm/kvm_host.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -1506,7 +1506,7 @@ static inline bool __vcpu_has_feature(co + #define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) + #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + +-#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) ++#define kvm_vcpu_initialized(v) vcpu_get_flag(v, VCPU_INITIALIZED) + + int kvm_trng_call(struct kvm_vcpu *vcpu); + #ifdef CONFIG_KVM diff --git a/queue-7.0/loongarch-fix-sym_sigfunc_start-definition-for-32bit.patch b/queue-7.0/loongarch-fix-sym_sigfunc_start-definition-for-32bit.patch new file mode 100644 index 0000000000..992f63daff --- /dev/null +++ b/queue-7.0/loongarch-fix-sym_sigfunc_start-definition-for-32bit.patch @@ -0,0 +1,33 @@ +From 98b8aebb14fdc0133939fd8fe07d0d98333dc976 Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Mon, 4 May 2026 09:00:01 +0800 +Subject: LoongArch: Fix SYM_SIGFUNC_START definition for 32BIT + +From: Huacai Chen + +commit 98b8aebb14fdc0133939fd8fe07d0d98333dc976 upstream. + +The SYM_SIGFUNC_START definition should match sigcontext that the length +of GPRs are 8 bytes for both 32BIT and 64BIT. So replace SZREG with 8 to +fix it. + +Cc: stable@vger.kernel.org +Fixes: e4878c37f6679fde ("LoongArch: vDSO: Emit GNU_EH_FRAME correctly") +Suggested-by: Xi Ruoyao +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/include/asm/linkage.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/include/asm/linkage.h ++++ b/arch/loongarch/include/asm/linkage.h +@@ -69,7 +69,7 @@ + 9, 10, 11, 12, 13, 14, 15, 16, \ + 17, 18, 19, 20, 21, 22, 23, 24, \ + 25, 26, 27, 28, 29, 30, 31; \ +- .cfi_offset \num, SC_REGS + \num * SZREG; \ ++ .cfi_offset \num, SC_REGS + \num * 8; \ + .endr; \ + \ + nop; \ diff --git a/queue-7.0/loongarch-kvm-compile-switch.s-directly-into-the-kernel.patch b/queue-7.0/loongarch-kvm-compile-switch.s-directly-into-the-kernel.patch new file mode 100644 index 0000000000..454e79837b --- /dev/null +++ b/queue-7.0/loongarch-kvm-compile-switch.s-directly-into-the-kernel.patch @@ -0,0 +1,264 @@ +From 5203012fa6045aac4b69d4e7c212e16dcf38ef10 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 4 May 2026 09:00:37 +0800 +Subject: LoongArch: KVM: Compile switch.S directly into the kernel + +From: Xianglai Li + +commit 5203012fa6045aac4b69d4e7c212e16dcf38ef10 upstream. + +If we directly compile the switch.S file into the kernel, the address of +the kvm_exc_entry function will definitely be within the DMW memory area. +Therefore, we will no longer need to perform a copy relocation of the +kvm_exc_entry. + +So this patch compiles switch.S directly into the kernel, and then remove +the copy relocation execution logic for the kvm_exc_entry function. + +Cc: stable@vger.kernel.org +Signed-off-by: Xianglai Li +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/Kbuild | 2 - + arch/loongarch/include/asm/asm-prototypes.h | 20 ++++++++++++++++ + arch/loongarch/include/asm/kvm_host.h | 3 -- + arch/loongarch/kvm/Makefile | 3 +- + arch/loongarch/kvm/main.c | 35 ++-------------------------- + arch/loongarch/kvm/switch.S | 20 ++++++++++++---- + 6 files changed, 41 insertions(+), 42 deletions(-) + +--- a/arch/loongarch/Kbuild ++++ b/arch/loongarch/Kbuild +@@ -3,7 +3,7 @@ obj-y += mm/ + obj-y += net/ + obj-y += vdso/ + +-obj-$(CONFIG_KVM) += kvm/ ++obj-$(subst m,y,$(CONFIG_KVM)) += kvm/ + + # for cleaning + subdir- += boot +--- a/arch/loongarch/include/asm/asm-prototypes.h ++++ b/arch/loongarch/include/asm/asm-prototypes.h +@@ -20,3 +20,23 @@ asmlinkage void noinstr __no_stack_prote + struct pt_regs *regs, + int (*fn)(void *), + void *fn_arg); ++ ++struct kvm_run; ++struct kvm_vcpu; ++struct loongarch_fpu; ++ ++void kvm_exc_entry(void); ++int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); ++ ++void kvm_save_fpu(struct loongarch_fpu *fpu); ++void kvm_restore_fpu(struct loongarch_fpu *fpu); ++ ++#ifdef CONFIG_CPU_HAS_LSX ++void kvm_save_lsx(struct loongarch_fpu *fpu); ++void kvm_restore_lsx(struct loongarch_fpu *fpu); ++#endif ++ ++#ifdef CONFIG_CPU_HAS_LASX ++void kvm_save_lasx(struct loongarch_fpu *fpu); ++void kvm_restore_lasx(struct loongarch_fpu *fpu); ++#endif +--- a/arch/loongarch/include/asm/kvm_host.h ++++ b/arch/loongarch/include/asm/kvm_host.h +@@ -86,7 +86,6 @@ struct kvm_context { + struct kvm_world_switch { + int (*exc_entry)(void); + int (*enter_guest)(struct kvm_run *run, struct kvm_vcpu *vcpu); +- unsigned long page_order; + }; + + #define MAX_PGTABLE_LEVELS 4 +@@ -356,8 +355,6 @@ void kvm_exc_entry(void); + int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); + + extern unsigned long vpid_mask; +-extern const unsigned long kvm_exception_size; +-extern const unsigned long kvm_enter_guest_size; + extern struct kvm_world_switch *kvm_loongarch_ops; + + #define SW_GCSR (1 << 0) +--- a/arch/loongarch/kvm/Makefile ++++ b/arch/loongarch/kvm/Makefile +@@ -7,11 +7,12 @@ include $(srctree)/virt/kvm/Makefile.kvm + + obj-$(CONFIG_KVM) += kvm.o + ++obj-y += switch.o ++ + kvm-y += exit.o + kvm-y += interrupt.o + kvm-y += main.o + kvm-y += mmu.o +-kvm-y += switch.o + kvm-y += timer.o + kvm-y += tlb.o + kvm-y += vcpu.o +--- a/arch/loongarch/kvm/main.c ++++ b/arch/loongarch/kvm/main.c +@@ -348,8 +348,7 @@ void kvm_arch_disable_virtualization_cpu + + static int kvm_loongarch_env_init(void) + { +- int cpu, order, ret; +- void *addr; ++ int cpu, ret; + struct kvm_context *context; + + vmcs = alloc_percpu(struct kvm_context); +@@ -365,30 +364,8 @@ static int kvm_loongarch_env_init(void) + return -ENOMEM; + } + +- /* +- * PGD register is shared between root kernel and kvm hypervisor. +- * So world switch entry should be in DMW area rather than TLB area +- * to avoid page fault reenter. +- * +- * In future if hardware pagetable walking is supported, we won't +- * need to copy world switch code to DMW area. +- */ +- order = get_order(kvm_exception_size + kvm_enter_guest_size); +- addr = (void *)__get_free_pages(GFP_KERNEL, order); +- if (!addr) { +- free_percpu(vmcs); +- vmcs = NULL; +- kfree(kvm_loongarch_ops); +- kvm_loongarch_ops = NULL; +- return -ENOMEM; +- } +- +- memcpy(addr, kvm_exc_entry, kvm_exception_size); +- memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size); +- flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size); +- kvm_loongarch_ops->exc_entry = addr; +- kvm_loongarch_ops->enter_guest = addr + kvm_exception_size; +- kvm_loongarch_ops->page_order = order; ++ kvm_loongarch_ops->exc_entry = (void *)kvm_exc_entry; ++ kvm_loongarch_ops->enter_guest = (void *)kvm_enter_guest; + + vpid_mask = read_csr_gstat(); + vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT; +@@ -422,16 +399,10 @@ static int kvm_loongarch_env_init(void) + + static void kvm_loongarch_env_exit(void) + { +- unsigned long addr; +- + if (vmcs) + free_percpu(vmcs); + + if (kvm_loongarch_ops) { +- if (kvm_loongarch_ops->exc_entry) { +- addr = (unsigned long)kvm_loongarch_ops->exc_entry; +- free_pages(addr, kvm_loongarch_ops->page_order); +- } + kfree(kvm_loongarch_ops); + } + +--- a/arch/loongarch/kvm/switch.S ++++ b/arch/loongarch/kvm/switch.S +@@ -4,9 +4,11 @@ + */ + + #include ++#include + #include + #include + #include ++#include + #include + #include + +@@ -100,8 +102,13 @@ + * - is still in guest mode, such as pgd table/vmid registers etc, + * - will fix with hw page walk enabled in future + * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS ++ * ++ * PGD register is shared between root kernel and kvm hypervisor. ++ * So world switch entry should be in DMW area rather than TLB area ++ * to avoid page fault re-enter. + */ + .text ++ .p2align PAGE_SHIFT + .cfi_sections .debug_frame + SYM_CODE_START(kvm_exc_entry) + UNWIND_HINT_UNDEFINED +@@ -190,8 +197,8 @@ ret_to_host: + kvm_restore_host_gpr a2 + jr ra + +-SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL) + SYM_CODE_END(kvm_exc_entry) ++EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry) + + /* + * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) +@@ -215,8 +222,8 @@ SYM_FUNC_START(kvm_enter_guest) + /* Save kvm_vcpu to kscratch */ + csrwr a1, KVM_VCPU_KS + kvm_switch_to_guest +-SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL) + SYM_FUNC_END(kvm_enter_guest) ++EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest) + + SYM_FUNC_START(kvm_save_fpu) + fpu_save_csr a0 t1 +@@ -224,6 +231,7 @@ SYM_FUNC_START(kvm_save_fpu) + fpu_save_cc a0 t1 t2 + jr ra + SYM_FUNC_END(kvm_save_fpu) ++EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu) + + SYM_FUNC_START(kvm_restore_fpu) + fpu_restore_double a0 t1 +@@ -231,6 +239,7 @@ SYM_FUNC_START(kvm_restore_fpu) + fpu_restore_cc a0 t1 t2 + jr ra + SYM_FUNC_END(kvm_restore_fpu) ++EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu) + + #ifdef CONFIG_CPU_HAS_LSX + SYM_FUNC_START(kvm_save_lsx) +@@ -239,6 +248,7 @@ SYM_FUNC_START(kvm_save_lsx) + lsx_save_data a0 t1 + jr ra + SYM_FUNC_END(kvm_save_lsx) ++EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx) + + SYM_FUNC_START(kvm_restore_lsx) + lsx_restore_data a0 t1 +@@ -246,6 +256,7 @@ SYM_FUNC_START(kvm_restore_lsx) + fpu_restore_csr a0 t1 t2 + jr ra + SYM_FUNC_END(kvm_restore_lsx) ++EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx) + #endif + + #ifdef CONFIG_CPU_HAS_LASX +@@ -255,6 +266,7 @@ SYM_FUNC_START(kvm_save_lasx) + lasx_save_data a0 t1 + jr ra + SYM_FUNC_END(kvm_save_lasx) ++EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx) + + SYM_FUNC_START(kvm_restore_lasx) + lasx_restore_data a0 t1 +@@ -262,10 +274,8 @@ SYM_FUNC_START(kvm_restore_lasx) + fpu_restore_csr a0 t1 t2 + jr ra + SYM_FUNC_END(kvm_restore_lasx) ++EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx) + #endif +- .section ".rodata" +-SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) +-SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) + + #ifdef CONFIG_CPU_HAS_LBT + STACK_FRAME_NON_STANDARD kvm_restore_fpu diff --git a/queue-7.0/mm-hugetlb_cma-round-up-per_node-before-logging-it.patch b/queue-7.0/mm-hugetlb_cma-round-up-per_node-before-logging-it.patch new file mode 100644 index 0000000000..fcb911901e --- /dev/null +++ b/queue-7.0/mm-hugetlb_cma-round-up-per_node-before-logging-it.patch @@ -0,0 +1,74 @@ +From 8f5ce56b76303c55b78a87af996e2e0f8535f979 Mon Sep 17 00:00:00 2001 +From: Sang-Heon Jeon +Date: Wed, 22 Apr 2026 23:33:53 +0900 +Subject: mm/hugetlb_cma: round up per_node before logging it + +From: Sang-Heon Jeon + +commit 8f5ce56b76303c55b78a87af996e2e0f8535f979 upstream. + +When the user requests a total hugetlb CMA size without per-node +specification, hugetlb_cma_reserve() computes per_node from +hugetlb_cma_size and the number of nodes that have memory + + per_node = DIV_ROUND_UP(hugetlb_cma_size, + nodes_weight(hugetlb_bootmem_nodes)); + +The reservation loop later computes + + size = round_up(min(per_node, hugetlb_cma_size - reserved), + PAGE_SIZE << order); + +So the actually reserved per_node size is multiple of (PAGE_SIZE << +order), but the logged per_node is not rounded up, so it may be smaller +than the actual reserved size. + +For example, as the existing comment describes, if a 3 GB area is +requested on a machine with 4 NUMA nodes that have memory, 1 GB is +allocated on the first three nodes, but the printed log is + + hugetlb_cma: reserve 3072 MiB, up to 768 MiB per node + +Round per_node up to (PAGE_SIZE << order) before logging so that the +printed log always matches the actual reserved size. No functional change +to the actual reservation size, as the following case analysis shows + +1. remaining (hugetlb_cma_size - reserved) >= rounded per_node + - AS-IS: min() picks unrounded per_node; + round_up() returns rounded per_node + - TO-BE: min() picks rounded per_node; + round_up() returns rounded per_node (no-op) +2. remaining < unrounded per_node + - AS-IS: min() picks remaining; + round_up() returns round_up(remaining) + - TO-BE: min() picks remaining; + round_up() returns round_up(remaining) +3. unrounded per_node <= remaining < rounded per_node + - AS-IS: min() picks unrounded per_node; + round_up() returns rounded per_node + - TO-BE: min() picks remaining; + round_up() returns round_up(remaining) equals rounded per_node + +Link: https://lore.kernel.org/20260422143353.852257-1-ekffu200098@gmail.com +Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") # 5.7 +Signed-off-by: Sang-Heon Jeon +Reviewed-by: Muchun Song +Cc: David Hildenbrand +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb_cma.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/hugetlb_cma.c ++++ b/mm/hugetlb_cma.c +@@ -204,6 +204,7 @@ void __init hugetlb_cma_reserve(void) + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, + nodes_weight(hugetlb_bootmem_nodes)); ++ per_node = round_up(per_node, PAGE_SIZE << order); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + } diff --git a/queue-7.0/mptcp-pm-add_addr-rtx-skip-inactive-subflows.patch b/queue-7.0/mptcp-pm-add_addr-rtx-skip-inactive-subflows.patch new file mode 100644 index 0000000000..ae7516117c --- /dev/null +++ b/queue-7.0/mptcp-pm-add_addr-rtx-skip-inactive-subflows.patch @@ -0,0 +1,64 @@ +From c6d395e2de1306b5fef0344a3c3835fbbfaa18be Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 5 May 2026 17:00:55 +0200 +Subject: mptcp: pm: ADD_ADDR rtx: skip inactive subflows + +From: Matthieu Baerts (NGI0) + +commit c6d395e2de1306b5fef0344a3c3835fbbfaa18be upstream. + +When looking at the maximum RTO amongst the subflows, inactive subflows +were taken into account: that includes stale ones, and the initial one +if it has been already been closed. + +Unusable subflows are now simply skipped. Stale ones are used as an +alternative: if there are only stale ones, to take their maximum RTO and +avoid to eventually fallback to net.mptcp.add_addr_timeout, which is set +to 2 minutes by default. + +Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-7-fca8091060a4@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/net/mptcp/pm.c ++++ b/net/mptcp/pm.c +@@ -305,18 +305,28 @@ static unsigned int mptcp_adjust_add_add + const struct net *net = sock_net((struct sock *)msk); + unsigned int rto = mptcp_get_add_addr_timeout(net); + struct mptcp_subflow_context *subflow; +- unsigned int max = 0; ++ unsigned int max = 0, max_stale = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct inet_connection_sock *icsk = inet_csk(ssk); + +- if (icsk->icsk_rto > max) ++ if (!__mptcp_subflow_active(subflow)) ++ continue; ++ ++ if (unlikely(subflow->stale)) { ++ if (icsk->icsk_rto > max_stale) ++ max_stale = icsk->icsk_rto; ++ } else if (icsk->icsk_rto > max) { + max = icsk->icsk_rto; ++ } + } + +- if (max && max < rto) +- rto = max; ++ if (max) ++ return min(max, rto); ++ ++ if (max_stale) ++ return min(max_stale, rto); + + return rto; + } diff --git a/queue-7.0/mtd-spi-nor-debugfs-fix-out-of-bounds-read-in-spi_nor_params_show.patch b/queue-7.0/mtd-spi-nor-debugfs-fix-out-of-bounds-read-in-spi_nor_params_show.patch new file mode 100644 index 0000000000..600b0eedbf --- /dev/null +++ b/queue-7.0/mtd-spi-nor-debugfs-fix-out-of-bounds-read-in-spi_nor_params_show.patch @@ -0,0 +1,60 @@ +From e47029b977e747cb3a9174308fd55762cce70147 Mon Sep 17 00:00:00 2001 +From: Tudor Ambarus +Date: Fri, 17 Apr 2026 15:24:39 +0000 +Subject: mtd: spi-nor: debugfs: fix out-of-bounds read in spi_nor_params_show() + +From: Tudor Ambarus + +commit e47029b977e747cb3a9174308fd55762cce70147 upstream. + +Sashiko noticed an out-of-bounds read [1]. + +In spi_nor_params_show(), the snor_f_names array is passed to +spi_nor_print_flags() using sizeof(snor_f_names). + +Since snor_f_names is an array of pointers, sizeof() returns the total +number of bytes occupied by the pointers + (element_count * sizeof(void *)) +rather than the element count itself. On 64-bit systems, this makes the +passed length 8x larger than intended. + +Inside spi_nor_print_flags(), the 'names_len' argument is used to +bounds-check the 'names' array access. An out-of-bounds read occurs +if a flag bit is set that exceeds the array's actual element count +but is within the inflated byte-size count. + +Correct this by using ARRAY_SIZE() to pass the actual number of +string pointers in the array. + +Cc: stable@vger.kernel.org +Fixes: 0257be79fc4a ("mtd: spi-nor: expose internal parameters via debugfs") +Closes: https://sashiko.dev/#/patchset/20260417-die-erase-fix-v2-1-73bb7004ebad%40infineon.com [1] +Signed-off-by: Tudor Ambarus +Reviewed-by: Takahiro Kuwano +Reviewed-by: Michael Walle +Reviewed-by: Pratyush Yadav +Signed-off-by: Miquel Raynal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mtd/spi-nor/debugfs.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/mtd/spi-nor/debugfs.c ++++ b/drivers/mtd/spi-nor/debugfs.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + ++#include + #include + #include + #include +@@ -92,7 +93,8 @@ static int spi_nor_params_show(struct se + seq_printf(s, "address nbytes\t%u\n", nor->addr_nbytes); + + seq_puts(s, "flags\t\t"); +- spi_nor_print_flags(s, nor->flags, snor_f_names, sizeof(snor_f_names)); ++ spi_nor_print_flags(s, nor->flags, snor_f_names, ++ ARRAY_SIZE(snor_f_names)); + seq_puts(s, "\n"); + + seq_puts(s, "\nopcodes\n"); diff --git a/queue-7.0/net-rtnetlink-zero-ifla_vf_broadcast-to-avoid-stack-infoleak-in-rtnl_fill_vfinfo.patch b/queue-7.0/net-rtnetlink-zero-ifla_vf_broadcast-to-avoid-stack-infoleak-in-rtnl_fill_vfinfo.patch new file mode 100644 index 0000000000..b9acc07c01 --- /dev/null +++ b/queue-7.0/net-rtnetlink-zero-ifla_vf_broadcast-to-avoid-stack-infoleak-in-rtnl_fill_vfinfo.patch @@ -0,0 +1,74 @@ +From 4b9e327991815e128ad3af75c3a04630a63ce3e0 Mon Sep 17 00:00:00 2001 +From: Kai Zen +Date: Thu, 30 Apr 2026 18:26:48 +0300 +Subject: net: rtnetlink: zero ifla_vf_broadcast to avoid stack infoleak in rtnl_fill_vfinfo + +From: Kai Zen + +commit 4b9e327991815e128ad3af75c3a04630a63ce3e0 upstream. + +rtnl_fill_vfinfo() declares struct ifla_vf_broadcast on the stack +without initialisation: + + struct ifla_vf_broadcast vf_broadcast; + +The struct contains a single fixed 32-byte field: + + /* include/uapi/linux/if_link.h */ + struct ifla_vf_broadcast { + __u8 broadcast[32]; + }; + +The function then copies dev->broadcast into it using dev->addr_len +as the length: + + memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); + +On Ethernet devices (the overwhelming majority of SR-IOV NICs) +dev->addr_len is 6, so only the first 6 bytes of broadcast[] are +written. The remaining 26 bytes retain whatever was previously on +the kernel stack. The full struct is then handed to userspace via: + + nla_put(skb, IFLA_VF_BROADCAST, + sizeof(vf_broadcast), &vf_broadcast) + +leaking up to 26 bytes of uninitialised kernel stack per VF per +RTM_GETLINK request, repeatable. + +The other vf_* structs in the same function are explicitly zeroed +for exactly this reason - see the memset() calls for ivi, +vf_vlan_info, node_guid and port_guid a few lines above. +vf_broadcast was simply missed when it was added. + +Reachability: any unprivileged local process can open AF_NETLINK / +NETLINK_ROUTE without capabilities and send RTM_GETLINK with an +IFLA_EXT_MASK attribute carrying RTEXT_FILTER_VF. The kernel walks +each VF and emits IFLA_VF_BROADCAST, leaking 26 bytes of stack per +VF per request. Stack residue at this call site can include return +addresses and transient sensitive data; KASAN with stack +instrumentation, or KMSAN, will flag the nla_put() when reproduced. + +Zero the on-stack struct before the partial memcpy, matching the +existing pattern used for the other vf_* structs in the same +function. + +Fixes: 75345f888f70 ("ipoib: show VF broadcast address") +Cc: stable@vger.kernel.org +Signed-off-by: Kai Zen +Link: https://patch.msgid.link/3c506e8f936e52b57620269b55c348af05d413a2.1777557228.git.kai.aizen.dev@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_ + port_guid.vf = ivi.vf; + + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); ++ memset(&vf_broadcast, 0, sizeof(vf_broadcast)); + memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); + vf_vlan.vlan = ivi.vlan; + vf_vlan.qos = ivi.qos; diff --git a/queue-7.0/perf-x86-intel-improve-validation-and-configuration-of-acr-masks.patch b/queue-7.0/perf-x86-intel-improve-validation-and-configuration-of-acr-masks.patch new file mode 100644 index 0000000000..625370c8a6 --- /dev/null +++ b/queue-7.0/perf-x86-intel-improve-validation-and-configuration-of-acr-masks.patch @@ -0,0 +1,95 @@ +From 5ad732a56be46aabf158c16aa0c095291727aaef Mon Sep 17 00:00:00 2001 +From: Dapeng Mi +Date: Thu, 30 Apr 2026 08:25:54 +0800 +Subject: perf/x86/intel: Improve validation and configuration of ACR masks + +From: Dapeng Mi + +commit 5ad732a56be46aabf158c16aa0c095291727aaef upstream. + +Currently there are several issues on the user space ACR mask validation +and configuration. +- The validation for user space ACR mask (attr.config2) is incomplete, + e.g., the ACR mask could include the index which belongs to another + ACR events group, but it's not validated. +- An early return on an invalid ACR mask caused all subsequent ACR groups + to be skipped. +- The stale hardware ACR mask (hw.config1) is not cleared before setting + new hardware ACR mask. + +The following changes address all of the above issues. +- Figure out the event index group of an ACR group. Any bits in the + user-space mask not present in the index group are now dropped. +- Instead of an early return on invalid bits, drop only the invalid + portions and continue iterating through all ACR events to ensure full + configuration. +- Explicitly clear the stale hardware ACR mask for each event prior to + writing the new configuration. + +Besides, a non-leader event member of ACR group could be disabled in +theory. This could cause bit-shifting errors in the acr_mask of remaining +group members. But since ACR sampling requires all events to be active, +this should not be a big concern in real use case. Add a "FIXME" comment +to notice this risk. + +Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") +Signed-off-by: Dapeng Mi +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260430002558.712334-2-dapeng1.mi@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -3332,23 +3332,41 @@ static void intel_pmu_enable_event(struc + static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) + { + struct perf_event *event, *leader; +- int i, j, idx; ++ int i, j, k, bit, idx; + ++ /* ++ * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). ++ * Disabling an ACR event causes bit-shifting errors in the acr_mask of ++ * remaining group members. As ACR sampling requires all events to be active, ++ * this limitation is acceptable for now. Revisit if independent event toggling ++ * is required. ++ */ + for (i = 0; i < cpuc->n_events; i++) { + leader = cpuc->event_list[i]; + if (!is_acr_event_group(leader)) + continue; + +- /* The ACR events must be contiguous. */ ++ /* Find the last event of the ACR group. */ + for (j = i; j < cpuc->n_events; j++) { + event = cpuc->event_list[j]; + if (event->group_leader != leader->group_leader) + break; +- for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { +- if (i + idx >= cpuc->n_events || +- !is_acr_event_group(cpuc->event_list[i + idx])) +- return; +- __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); ++ } ++ ++ /* ++ * Translate the user-space ACR mask (attr.config2) into the physical ++ * counter bitmask (hw.config1) for each ACR event in the group. ++ * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). ++ */ ++ for (k = i; k < j; k++) { ++ event = cpuc->event_list[k]; ++ event->hw.config1 = 0; ++ for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { ++ idx = i + bit; ++ /* Event index of ACR group must locate in [i, j). */ ++ if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) ++ continue; ++ __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); + } + } + i = j - 1; diff --git a/queue-7.0/rseq-don-t-advertise-time-slice-extensions-if-disabled.patch b/queue-7.0/rseq-don-t-advertise-time-slice-extensions-if-disabled.patch new file mode 100644 index 0000000000..994a0dabde --- /dev/null +++ b/queue-7.0/rseq-don-t-advertise-time-slice-extensions-if-disabled.patch @@ -0,0 +1,67 @@ +From 010b7723c0a3b9ad58f50b715dbe2e7781d29400 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 28 Apr 2026 09:34:45 +0200 +Subject: rseq: Don't advertise time slice extensions if disabled + +From: Thomas Gleixner + +commit 010b7723c0a3b9ad58f50b715dbe2e7781d29400 upstream. + +If time slice extensions have been disabled on the kernel command line, +then advertising them in RSEQ flags is wrong. + +Adjust the conditionals to reflect reality, fixup the misleading comments +about the gap of these flags and the rseq::flags field. + +Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.437059375%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/rseq.h | 5 ++++- + kernel/rseq.c | 9 +++++---- + 2 files changed, 9 insertions(+), 5 deletions(-) + +--- a/include/uapi/linux/rseq.h ++++ b/include/uapi/linux/rseq.h +@@ -28,7 +28,7 @@ enum rseq_cs_flags_bit { + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +- /* (3) Intentional gap to put new bits into a separate byte */ ++ /* (3) Intentional gap to keep new bits separate */ + + /* User read only feature flags */ + RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, +@@ -161,6 +161,9 @@ struct rseq { + * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT + * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL + * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE ++ * ++ * It is now used for feature status advertisement by the kernel. ++ * See: enum rseq_cs_flags_bit for further information. + */ + __u32 flags; + +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -462,10 +462,11 @@ SYSCALL_DEFINE4(rseq, struct rseq __user + return -EFAULT; + + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { +- rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; +- if (rseq_slice_extension_enabled() && +- (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON)) +- rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; ++ if (rseq_slice_extension_enabled()) { ++ rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; ++ if (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) ++ rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; ++ } + } + + scoped_user_write_access(rseq, efault) { diff --git a/queue-7.0/rseq-protect-rseq_reset-against-interrupts.patch b/queue-7.0/rseq-protect-rseq_reset-against-interrupts.patch new file mode 100644 index 0000000000..e6043bc46f --- /dev/null +++ b/queue-7.0/rseq-protect-rseq_reset-against-interrupts.patch @@ -0,0 +1,38 @@ +From e9766e6f7d330dce7530918d8c6e3ec96d6c6e24 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 28 Apr 2026 10:14:41 +0200 +Subject: rseq: Protect rseq_reset() against interrupts + +From: Thomas Gleixner + +commit e9766e6f7d330dce7530918d8c6e3ec96d6c6e24 upstream. + +rseq_reset() uses memset() to clear the tasks rseq data. That's racy +against membarrier() and preemption. + +Guard it with irqsave to cure this. + +Fixes: faba9d250eae ("rseq: Introduce struct rseq_data") +Reported-by: Dmitry Vyukov +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.353887714%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rseq.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/rseq.h ++++ b/include/linux/rseq.h +@@ -119,6 +119,8 @@ static inline void rseq_virt_userspace_e + + static inline void rseq_reset(struct task_struct *t) + { ++ /* Protect against preemption and membarrier IPI */ ++ guard(irqsave)(); + memset(&t->rseq, 0, sizeof(t->rseq)); + t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; + } diff --git a/queue-7.0/rseq-set-rseq-cpu_id_start-to-0-on-unregistration.patch b/queue-7.0/rseq-set-rseq-cpu_id_start-to-0-on-unregistration.patch new file mode 100644 index 0000000000..0ed909f81b --- /dev/null +++ b/queue-7.0/rseq-set-rseq-cpu_id_start-to-0-on-unregistration.patch @@ -0,0 +1,68 @@ +From 2cb68e45120dfc66404c7547d95b8ac6ff0b25ce Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Tue, 28 Apr 2026 10:10:19 +0200 +Subject: rseq: Set rseq::cpu_id_start to 0 on unregistration + +From: Thomas Gleixner + +commit 2cb68e45120dfc66404c7547d95b8ac6ff0b25ce upstream. + +The RSEQ rework changed that to RSEQ_CPU_UNINITILIZED, which is obviously +incompatible. Revert back to the original behavior. + +Fixes: 0f085b41880e ("rseq: Provide and use rseq_set_ids()") +Reported-by: Dmitry Vyukov +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.271566313%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rseq.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -236,11 +236,6 @@ static int __init rseq_debugfs_init(void + } + __initcall(rseq_debugfs_init); + +-static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id) +-{ +- return rseq_set_ids_get_csaddr(t, ids, node_id, NULL); +-} +- + static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs) + { + struct rseq __user *urseq = t->rseq.usrptr; +@@ -384,19 +379,22 @@ void rseq_syscall(struct pt_regs *regs) + + static bool rseq_reset_ids(void) + { +- struct rseq_ids ids = { +- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +- .mm_cid = 0, +- }; ++ struct rseq __user *rseq = current->rseq.usrptr; + + /* + * If this fails, terminate it because this leaves the kernel in + * stupid state as exit to user space will try to fixup the ids + * again. + */ +- if (rseq_set_ids(current, &ids, 0)) +- return true; ++ scoped_user_rw_access(rseq, efault) { ++ unsafe_put_user(0, &rseq->cpu_id_start, efault); ++ unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); ++ unsafe_put_user(0, &rseq->node_id, efault); ++ unsafe_put_user(0, &rseq->mm_cid, efault); ++ } ++ return true; + ++efault: + force_sig(SIGSEGV); + return false; + } diff --git a/queue-7.0/selftests-rseq-don-t-run-tests-with-runner-scripts-outside-of-the-scripts.patch b/queue-7.0/selftests-rseq-don-t-run-tests-with-runner-scripts-outside-of-the-scripts.patch new file mode 100644 index 0000000000..f9264abe68 --- /dev/null +++ b/queue-7.0/selftests-rseq-don-t-run-tests-with-runner-scripts-outside-of-the-scripts.patch @@ -0,0 +1,61 @@ +From cb48828f06afa232cc330f0f4d6be101067810b3 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Thu, 23 Apr 2026 20:17:45 +0100 +Subject: selftests/rseq: Don't run tests with runner scripts outside of the scripts + +From: Mark Brown + +commit cb48828f06afa232cc330f0f4d6be101067810b3 upstream. + +The rseq selftests include two runner scripts run_param_test.sh and +run_syscall_errors_test.sh which set up the environment for test binaries +and run them with various parameters. Currently we list these test binaries +in TEST_GEN_PROGS but this results in the kselftest framework running them +directly as well as via the runners, resulting in duplication and spurious +failures when the environment is not correctly set up (eg, if glibc tries +to use rseq). + +Move the binaries the runners invoke to TEST_GEN_PROGS_EXTENDED, binaries +listed there are built but not run by the framework. The param_test +benchmarks are not moved since they are not run by run_param_test.sh. + +Fixes: 830969e7821a ("selftests/rseq: Implement time slice extension test") + +Signed-off-by: Mark Brown +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Link: https://patch.msgid.link/20260423-selftests-rseq-use-runner-v1-1-e13a133754c1@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/Makefile | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile +index 4ef90823b652..0d1947c0d623 100644 +--- a/tools/testing/selftests/rseq/Makefile ++++ b/tools/testing/selftests/rseq/Makefile +@@ -14,12 +14,15 @@ LDLIBS += -lpthread -ldl + # still track changes to header files and depend on shared object. + OVERRIDE_TARGETS = 1 + +-TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ +- param_test_benchmark param_test_compare_twice param_test_mm_cid \ +- param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \ +- syscall_errors_test slice_test ++TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ ++ param_test_benchmark param_test_mm_cid_benchmark slice_test + +-TEST_GEN_PROGS_EXTENDED = librseq.so ++TEST_GEN_PROGS_EXTENDED = librseq.so \ ++ param_test \ ++ param_test_compare_twice \ ++ param_test_mm_cid \ ++ param_test_mm_cid_compare_twice \ ++ syscall_errors_test + + TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh + +-- +2.54.0 + diff --git a/queue-7.0/selftests-rseq-expand-for-optimized-rseq-abi-v2.patch b/queue-7.0/selftests-rseq-expand-for-optimized-rseq-abi-v2.patch new file mode 100644 index 0000000000..1a94980350 --- /dev/null +++ b/queue-7.0/selftests-rseq-expand-for-optimized-rseq-abi-v2.patch @@ -0,0 +1,297 @@ +From e744060076871eebc2647b24420b550ff44b2b65 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 25 Apr 2026 14:48:23 +0200 +Subject: selftests/rseq: Expand for optimized RSEQ ABI v2 + +From: Thomas Gleixner + +commit e744060076871eebc2647b24420b550ff44b2b65 upstream. + +Update the selftests so they are executed for legacy (32 bytes RSEQ region) +and optimized RSEQ ABI v2 mode. + +Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224428.009121296%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/Makefile | 11 ++++-- + .../testing/selftests/rseq/check_optimized.c | 17 ++++++++ + tools/testing/selftests/rseq/param_test.c | 25 +++++++----- + .../testing/selftests/rseq/run_param_test.sh | 39 +++++++++++++++++++ + .../selftests/rseq/run_timeslice_test.sh | 14 +++++++ + tools/testing/selftests/rseq/slice_test.c | 2 +- + 6 files changed, 95 insertions(+), 13 deletions(-) + create mode 100644 tools/testing/selftests/rseq/check_optimized.c + create mode 100755 tools/testing/selftests/rseq/run_timeslice_test.sh + +diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile +index 0293a2f17f51..50d69e22ee7a 100644 +--- a/tools/testing/selftests/rseq/Makefile ++++ b/tools/testing/selftests/rseq/Makefile +@@ -15,7 +15,7 @@ LDLIBS += -lpthread -ldl + OVERRIDE_TARGETS = 1 + + TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ +- param_test_benchmark param_test_mm_cid_benchmark slice_test ++ param_test_benchmark param_test_mm_cid_benchmark + + TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test \ +@@ -23,9 +23,11 @@ TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test_mm_cid \ + param_test_mm_cid_compare_twice \ + syscall_errors_test \ +- legacy_check ++ legacy_check \ ++ slice_test \ ++ check_optimized + +-TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh ++TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh run_timeslice_test.sh + + TEST_FILES := settings + +@@ -66,3 +68,6 @@ $(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) + + $(OUTPUT)/slice_test: slice_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ ++ ++$(OUTPUT)/check_optimized: check_optimized.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h ++ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ +diff --git a/tools/testing/selftests/rseq/check_optimized.c b/tools/testing/selftests/rseq/check_optimized.c +new file mode 100644 +index 000000000000..a13e3f2c8fc6 +--- /dev/null ++++ b/tools/testing/selftests/rseq/check_optimized.c +@@ -0,0 +1,17 @@ ++// SPDX-License-Identifier: LGPL-2.1 ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "rseq.h" ++ ++int main(int argc, char **argv) ++{ ++ if (__rseq_register_current_thread(true, false)) ++ return -1; ++ return 0; ++} +diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c +index 05d03e679e06..e1e98dbabe4b 100644 +--- a/tools/testing/selftests/rseq/param_test.c ++++ b/tools/testing/selftests/rseq/param_test.c +@@ -38,7 +38,7 @@ static int opt_modulo, verbose; + static int opt_yield, opt_signal, opt_sleep, + opt_disable_rseq, opt_threads = 200, + opt_disable_mod = 0, opt_test = 's'; +- ++static bool opt_rseq_legacy; + static long long opt_reps = 5000; + + static __thread __attribute__((tls_model("initial-exec"))) +@@ -281,9 +281,12 @@ unsigned int yield_mod_cnt, nr_abort; + } \ + } + ++#define rseq_no_glibc true ++ + #else + + #define printf_verbose(fmt, ...) ++#define rseq_no_glibc false + + #endif /* BENCHMARK */ + +@@ -481,7 +484,7 @@ void *test_percpu_spinlock_thread(void *arg) + long long i, reps; + + if (!opt_disable_rseq && thread_data->reg && +- rseq_register_current_thread()) ++ __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + abort(); + reps = thread_data->reps; + for (i = 0; i < reps; i++) { +@@ -558,7 +561,7 @@ void *test_percpu_inc_thread(void *arg) + long long i, reps; + + if (!opt_disable_rseq && thread_data->reg && +- rseq_register_current_thread()) ++ __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + abort(); + reps = thread_data->reps; + for (i = 0; i < reps; i++) { +@@ -712,7 +715,7 @@ void *test_percpu_list_thread(void *arg) + long long i, reps; + struct percpu_list *list = (struct percpu_list *)arg; + +- if (!opt_disable_rseq && rseq_register_current_thread()) ++ if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + abort(); + + reps = opt_reps; +@@ -895,7 +898,7 @@ void *test_percpu_buffer_thread(void *arg) + long long i, reps; + struct percpu_buffer *buffer = (struct percpu_buffer *)arg; + +- if (!opt_disable_rseq && rseq_register_current_thread()) ++ if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + abort(); + + reps = opt_reps; +@@ -1105,7 +1108,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) + long long i, reps; + struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; + +- if (!opt_disable_rseq && rseq_register_current_thread()) ++ if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + abort(); + + reps = opt_reps; +@@ -1258,7 +1261,7 @@ void *test_membarrier_worker_thread(void *arg) + const int iters = opt_reps; + int i; + +- if (rseq_register_current_thread()) { ++ if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); +@@ -1323,7 +1326,7 @@ void *test_membarrier_manager_thread(void *arg) + intptr_t expect_a = 0, expect_b = 0; + int cpu_a = 0, cpu_b = 0; + +- if (rseq_register_current_thread()) { ++ if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); +@@ -1475,6 +1478,7 @@ static void show_usage(int argc, char **argv) + printf(" [-D M] Disable rseq for each M threads\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); + printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); ++ printf(" [-O] Test with optimized RSEQ\n"); + printf(" [-v] Verbose output.\n"); + printf(" [-h] Show this help.\n"); + printf("\n"); +@@ -1602,6 +1606,9 @@ int main(int argc, char **argv) + case 'M': + opt_mo = RSEQ_MO_RELEASE; + break; ++ case 'L': ++ opt_rseq_legacy = true; ++ break; + default: + show_usage(argc, argv); + goto error; +@@ -1618,7 +1625,7 @@ int main(int argc, char **argv) + if (set_signal_handler()) + goto error; + +- if (!opt_disable_rseq && rseq_register_current_thread()) ++ if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) + goto error; + if (!opt_disable_rseq && !rseq_validate_cpu_id()) { + fprintf(stderr, "Error: cpu id getter unavailable\n"); +diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh +index 8d31426ab41f..69a3fa049929 100755 +--- a/tools/testing/selftests/rseq/run_param_test.sh ++++ b/tools/testing/selftests/rseq/run_param_test.sh +@@ -34,6 +34,11 @@ REPS=1000 + SLOW_REPS=100 + NR_THREADS=$((6*${NR_CPUS})) + ++# Prevent GLIBC from registering RSEQ so the selftest can run in legacy and ++# performance optimized mode. ++GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ++export GLIBC_TUNABLES ++ + function do_tests() + { + local i=0 +@@ -103,6 +108,40 @@ function inject_blocking() + NR_LOOPS= + } + ++echo "Testing in legacy RSEQ mode" ++echo "Yield injection (25%)" ++inject_blocking -m 4 -y -L ++ ++echo "Yield injection (50%)" ++inject_blocking -m 2 -y -L ++ ++echo "Yield injection (100%)" ++inject_blocking -m 1 -y -L ++ ++echo "Kill injection (25%)" ++inject_blocking -m 4 -k -L ++ ++echo "Kill injection (50%)" ++inject_blocking -m 2 -k -L ++ ++echo "Kill injection (100%)" ++inject_blocking -m 1 -k -L ++ ++echo "Sleep injection (1ms, 25%)" ++inject_blocking -m 4 -s 1 -L ++ ++echo "Sleep injection (1ms, 50%)" ++inject_blocking -m 2 -s 1 -L ++ ++echo "Sleep injection (1ms, 100%)" ++inject_blocking -m 1 -s 1 -L ++ ++./check_optimized || { ++ echo "Skipping optimized RSEQ mode test. Not supported"; ++ exit 0 ++} ++ ++echo "Testing in optimized RSEQ mode" + echo "Yield injection (25%)" + inject_blocking -m 4 -y + +diff --git a/tools/testing/selftests/rseq/run_timeslice_test.sh b/tools/testing/selftests/rseq/run_timeslice_test.sh +new file mode 100755 +index 000000000000..551ebed71ec6 +--- /dev/null ++++ b/tools/testing/selftests/rseq/run_timeslice_test.sh +@@ -0,0 +1,14 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0+ ++ ++# Prevent GLIBC from registering RSEQ so the selftest can run in legacy ++# and performance optimized mode. ++GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ++export GLIBC_TUNABLES ++ ++./check_optimized || { ++ echo "Skipping optimized RSEQ mode test. Not supported"; ++ exit 0 ++} ++ ++./slice_test +diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c +index 77e668ff74d7..e402d4440bc2 100644 +--- a/tools/testing/selftests/rseq/slice_test.c ++++ b/tools/testing/selftests/rseq/slice_test.c +@@ -124,7 +124,7 @@ FIXTURE_SETUP(slice_ext) + { + cpu_set_t affinity; + +- if (rseq_register_current_thread()) ++ if (__rseq_register_current_thread(true, false)) + SKIP(return, "RSEQ not supported\n"); + + if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, +-- +2.54.0 + diff --git a/queue-7.0/selftests-rseq-make-registration-flexible-for-legacy-and-optimized-mode.patch b/queue-7.0/selftests-rseq-make-registration-flexible-for-legacy-and-optimized-mode.patch new file mode 100644 index 0000000000..ee72496f86 --- /dev/null +++ b/queue-7.0/selftests-rseq-make-registration-flexible-for-legacy-and-optimized-mode.patch @@ -0,0 +1,171 @@ +From d97cb2ef0b221b068e90b6058aa97faa0626bdab Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sun, 26 Apr 2026 18:13:54 +0200 +Subject: selftests/rseq: Make registration flexible for legacy and optimized mode + +From: Thomas Gleixner + +commit d97cb2ef0b221b068e90b6058aa97faa0626bdab upstream. + +rseq_register_current_thread() either uses the glibc registered RSEQ region +or registers it's own region with the legacy size of 32 bytes. + +That worked so far, but becomes a problem when the kernel implements a +distinction between legacy and performance optimized behavior based on the +registration size as that does not allow to test both modes with the self +test suite. + +Add two arguments to the function. One to enforce that the registration is +not using libc provided mode and one to tell the registration to use the +legacy size and not the kernel advertised size. + +Rename it and make the original one a inline wrapper which preserves the +existing behavior. + +Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.677889423%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/rseq-abi.h | 7 ++++- + tools/testing/selftests/rseq/rseq.c | 39 ++++++++++++++------------------ + tools/testing/selftests/rseq/rseq.h | 8 +++++- + 3 files changed, 31 insertions(+), 23 deletions(-) + +--- a/tools/testing/selftests/rseq/rseq-abi.h ++++ b/tools/testing/selftests/rseq/rseq-abi.h +@@ -192,9 +192,14 @@ struct rseq_abi { + struct rseq_abi_slice_ctrl slice_ctrl; + + /* ++ * Place holder to push the size above 32 bytes. ++ */ ++ __u8 __reserved; ++ ++ /* + * Flexible array member at end of structure, after last feature field. + */ + char end[]; +-} __attribute__((aligned(4 * sizeof(__u64)))); ++} __attribute__((aligned(256))); + + #endif /* _RSEQ_ABI_H */ +--- a/tools/testing/selftests/rseq/rseq.c ++++ b/tools/testing/selftests/rseq/rseq.c +@@ -56,6 +56,7 @@ ptrdiff_t rseq_offset; + * unsuccessful. + */ + unsigned int rseq_size = -1U; ++static unsigned int rseq_alloc_size; + + /* Flags used during rseq registration. */ + unsigned int rseq_flags; +@@ -115,29 +116,17 @@ bool rseq_available(void) + } + } + +-/* The rseq areas need to be at least 32 bytes. */ +-static +-unsigned int get_rseq_min_alloc_size(void) +-{ +- unsigned int alloc_size = rseq_size; +- +- if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) +- alloc_size = ORIG_RSEQ_ALLOC_SIZE; +- return alloc_size; +-} +- + /* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * +- * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) ++ * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +-static +-unsigned int get_rseq_kernel_feature_size(void) ++static unsigned int get_rseq_kernel_feature_size(void) + { + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + +@@ -152,15 +141,24 @@ unsigned int get_rseq_kernel_feature_siz + return ORIG_RSEQ_FEATURE_SIZE; + } + +-int rseq_register_current_thread(void) ++int __rseq_register_current_thread(bool nolibc, bool legacy) + { ++ unsigned int size; + int rc; + + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ +- return 0; ++ return nolibc ? -EBUSY : 0; + } +- rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); ++ ++ /* The minimal allocation size is 32, which is the legacy allocation size */ ++ size = get_rseq_kernel_feature_size(); ++ if (legacy || size < ORIG_RSEQ_ALLOC_SIZE) ++ rseq_alloc_size = ORIG_RSEQ_ALLOC_SIZE; ++ else ++ rseq_alloc_size = size; ++ ++ rc = sys_rseq(&__rseq.abi, rseq_alloc_size, 0, RSEQ_SIG); + if (rc) { + /* + * After at least one thread has registered successfully +@@ -179,9 +177,8 @@ int rseq_register_current_thread(void) + * The first thread to register sets the rseq_size to mimic the libc + * behavior. + */ +- if (RSEQ_READ_ONCE(rseq_size) == 0) { +- RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); +- } ++ if (RSEQ_READ_ONCE(rseq_size) == 0) ++ RSEQ_WRITE_ONCE(rseq_size, size); + + return 0; + } +@@ -194,7 +191,7 @@ int rseq_unregister_current_thread(void) + /* Treat libc's ownership as a successful unregistration. */ + return 0; + } +- rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); ++ rc = sys_rseq(&__rseq.abi, rseq_alloc_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; +--- a/tools/testing/selftests/rseq/rseq.h ++++ b/tools/testing/selftests/rseq/rseq.h +@@ -8,6 +8,7 @@ + #ifndef RSEQ_H + #define RSEQ_H + ++#include + #include + #include + #include +@@ -142,7 +143,12 @@ static inline struct rseq_abi *rseq_get_ + * succeed. A restartable sequence executed from a non-registered + * thread will always fail. + */ +-int rseq_register_current_thread(void); ++int __rseq_register_current_thread(bool nolibc, bool legacy); ++ ++static inline int rseq_register_current_thread(void) ++{ ++ return __rseq_register_current_thread(false, false); ++} + + /* + * Unregister rseq for current thread. diff --git a/queue-7.0/selftests-rseq-skip-tests-if-time-slice-extensions-are-not-available.patch b/queue-7.0/selftests-rseq-skip-tests-if-time-slice-extensions-are-not-available.patch new file mode 100644 index 0000000000..0512bd6cba --- /dev/null +++ b/queue-7.0/selftests-rseq-skip-tests-if-time-slice-extensions-are-not-available.patch @@ -0,0 +1,52 @@ +From 02b44d943b3adddc3a15c1da97045e205b7d14c1 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sat, 25 Apr 2026 15:46:06 +0200 +Subject: selftests/rseq: Skip tests if time slice extensions are not available + +From: Thomas Gleixner + +commit 02b44d943b3adddc3a15c1da97045e205b7d14c1 upstream. + +Don't fail, skip the test if the extensions are not enabled at compile or +runtime. + +Fixes: 830969e7821a ("selftests/rseq: Implement time slice extension test") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.597838491%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/slice_test.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/tools/testing/selftests/rseq/slice_test.c ++++ b/tools/testing/selftests/rseq/slice_test.c +@@ -124,6 +124,13 @@ FIXTURE_SETUP(slice_ext) + { + cpu_set_t affinity; + ++ if (rseq_register_current_thread()) ++ SKIP(return, "RSEQ not supported\n"); ++ ++ if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, ++ PR_RSEQ_SLICE_EXT_ENABLE, 0, 0)) ++ SKIP(return, "Time slice extension not supported\n"); ++ + ASSERT_EQ(sched_getaffinity(0, sizeof(affinity), &affinity), 0); + + /* Pin it on a single CPU. Avoid CPU 0 */ +@@ -137,11 +144,6 @@ FIXTURE_SETUP(slice_ext) + break; + } + +- ASSERT_EQ(rseq_register_current_thread(), 0); +- +- ASSERT_EQ(prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, +- PR_RSEQ_SLICE_EXT_ENABLE, 0, 0), 0); +- + self->noise_params.noise_nsecs = variant->noise_nsecs; + self->noise_params.sleep_nsecs = variant->sleep_nsecs; + self->noise_params.run = 1; diff --git a/queue-7.0/selftests-rseq-validate-legacy-behavior.patch b/queue-7.0/selftests-rseq-validate-legacy-behavior.patch new file mode 100644 index 0000000000..bdae24b49d --- /dev/null +++ b/queue-7.0/selftests-rseq-validate-legacy-behavior.patch @@ -0,0 +1,136 @@ +From fdf4eb632683bfc2840acebe62716cb468d43e10 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sun, 26 Apr 2026 17:51:07 +0200 +Subject: selftests/rseq: Validate legacy behavior + +From: Thomas Gleixner + +commit fdf4eb632683bfc2840acebe62716cb468d43e10 upstream. + +The RSEQ legacy mode behavior requires that the ID fields in the rseq +region are unconditionally updated on every context switch and before +signal delivery even if not required by the ABI specification. + +To ensure that this behavior is preserved for legacy users in the future, +add a test which validates that with a sleep() and a signal sent to self. + +Provide a run script which prevents GLIBC from registering a RSEQ region, +so that the test can register it's own legacy sized region. + +Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.764705536%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/Makefile | 5 +- + tools/testing/selftests/rseq/legacy_check.c | 65 +++++++++++++++++++ + .../selftests/rseq/run_legacy_check.sh | 4 ++ + 3 files changed, 72 insertions(+), 2 deletions(-) + create mode 100644 tools/testing/selftests/rseq/legacy_check.c + create mode 100755 tools/testing/selftests/rseq/run_legacy_check.sh + +diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile +index 0d1947c0d623..0293a2f17f51 100644 +--- a/tools/testing/selftests/rseq/Makefile ++++ b/tools/testing/selftests/rseq/Makefile +@@ -22,9 +22,10 @@ TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test_compare_twice \ + param_test_mm_cid \ + param_test_mm_cid_compare_twice \ +- syscall_errors_test ++ syscall_errors_test \ ++ legacy_check + +-TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh ++TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh + + TEST_FILES := settings + +diff --git a/tools/testing/selftests/rseq/legacy_check.c b/tools/testing/selftests/rseq/legacy_check.c +new file mode 100644 +index 000000000000..3f7de4e28303 +--- /dev/null ++++ b/tools/testing/selftests/rseq/legacy_check.c +@@ -0,0 +1,65 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++ ++#include ++#include ++#include ++#include ++ ++#include "rseq.h" ++ ++#include "../kselftest_harness.h" ++ ++FIXTURE(legacy) ++{ ++}; ++ ++static int cpu_id_in_sigfn = -1; ++ ++static void sigfn(int sig) ++{ ++ struct rseq_abi *rs = rseq_get_abi(); ++ ++ cpu_id_in_sigfn = rs->cpu_id_start; ++} ++ ++FIXTURE_SETUP(legacy) ++{ ++ int res = __rseq_register_current_thread(true, true); ++ ++ switch (res) { ++ case -ENOSYS: ++ SKIP(return, "RSEQ not enabled\n"); ++ case -EBUSY: ++ SKIP(return, "GLIBC owns RSEQ. Disable GLIBC RSEQ registration\n"); ++ default: ++ ASSERT_EQ(res, 0); ++ } ++ ++ ASSERT_NE(signal(SIGUSR1, sigfn), SIG_ERR); ++} ++ ++FIXTURE_TEARDOWN(legacy) ++{ ++} ++ ++TEST_F(legacy, legacy_test) ++{ ++ struct rseq_abi *rs = rseq_get_abi(); ++ ++ ASSERT_NE(rs, NULL); ++ ++ /* Overwrite rs::cpu_id_start */ ++ rs->cpu_id_start = -1; ++ sleep(1); ++ ASSERT_NE(rs->cpu_id_start, -1); ++ ++ rs->cpu_id_start = -1; ++ ASSERT_EQ(raise(SIGUSR1), 0); ++ ASSERT_NE(rs->cpu_id_start, -1); ++ ASSERT_NE(cpu_id_in_sigfn, -1); ++} ++ ++TEST_HARNESS_MAIN +diff --git a/tools/testing/selftests/rseq/run_legacy_check.sh b/tools/testing/selftests/rseq/run_legacy_check.sh +new file mode 100755 +index 000000000000..5577b46ea092 +--- /dev/null ++++ b/tools/testing/selftests/rseq/run_legacy_check.sh +@@ -0,0 +1,4 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++ ++GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./legacy_check +-- +2.54.0 + diff --git a/queue-7.0/series b/queue-7.0/series index 977dae9096..7046576531 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -89,3 +89,22 @@ spi-tegra20-sflash-fix-controller-deregistration.patch spi-s3c64xx-fix-null-deref-on-driver-unbind.patch staging-rtl8723bs-os_dep-avoid-null-pointer-dereference-in-rtw_cbuf_alloc.patch staging-vme_user-fix-root-device-leak-on-init-failure.patch +fanotify-fix-false-positive-on-permission-events.patch +kvm-arm64-fix-kvm_vcpu_initialized-macro-parameter.patch +mtd-spi-nor-debugfs-fix-out-of-bounds-read-in-spi_nor_params_show.patch +arm64-signal-preserve-por_el0-if-poe_context-is-missing.patch +mm-hugetlb_cma-round-up-per_node-before-logging-it.patch +loongarch-fix-sym_sigfunc_start-definition-for-32bit.patch +loongarch-kvm-compile-switch.s-directly-into-the-kernel.patch +net-rtnetlink-zero-ifla_vf_broadcast-to-avoid-stack-infoleak-in-rtnl_fill_vfinfo.patch +mptcp-pm-add_addr-rtx-skip-inactive-subflows.patch +perf-x86-intel-improve-validation-and-configuration-of-acr-masks.patch +selftests-rseq-don-t-run-tests-with-runner-scripts-outside-of-the-scripts.patch +rseq-set-rseq-cpu_id_start-to-0-on-unregistration.patch +rseq-protect-rseq_reset-against-interrupts.patch +rseq-don-t-advertise-time-slice-extensions-if-disabled.patch +selftests-rseq-make-registration-flexible-for-legacy-and-optimized-mode.patch +selftests-rseq-skip-tests-if-time-slice-extensions-are-not-available.patch +selftests-rseq-validate-legacy-behavior.patch +selftests-rseq-expand-for-optimized-rseq-abi-v2.patch +accel-ivpu-disallow-re-exporting-imported-gem-objects.patch