From: Sasha Levin Date: Tue, 19 May 2026 19:24:46 +0000 (-0400) Subject: Fixes for all trees X-Git-Tag: v6.6.141~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=28d8111dae0db498dc1a9190006e927bcbcb6f8c;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for all trees Signed-off-by: Sasha Levin --- diff --git a/queue-6.18/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch b/queue-6.18/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch new file mode 100644 index 0000000000..7b432ea896 --- /dev/null +++ b/queue-6.18/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch @@ -0,0 +1,65 @@ +From 8cbffa93ac074bdc433f53330bd64120734b5990 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 17 May 2026 13:51:01 +0900 +Subject: HID: core: Fix size_t specifier in hid_report_raw_event() + +From: Nathan Chancellor + +[ Upstream commit 4d3a2a466b8d68d852a1f3bbf11204b718428dc4 ] + +When building for 32-bit platforms, for which 'size_t' is +'unsigned int', there are warnings around using the incorrect format +specifier to print bsize in hid_report_raw_event(): + + drivers/hid/hid-core.c:2054:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] + 2053 | hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", + | ~~~ + | %zu + 2054 | report->id, csize, bsize); + | ^~~~~ + drivers/hid/hid-core.c:2076:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] + 2075 | hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", + | ~~~ + | %zu + 2076 | report->id, rsize, bsize); + | ^~~~~ + +Use the proper 'size_t' format specifier, '%zu', to clear up the +warnings. + +Cc: stable@vger.kernel.org +Fixes: 2c85c61d1332 ("HID: pass the buffer size to hid_report_raw_event") +Reported-by: Miguel Ojeda +Closes: https://lore.kernel.org/20260516020430.110135-1-ojeda@kernel.org/ +Signed-off-by: Nathan Chancellor +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index e67ea3a7d1395..6a1600af30e20 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -2046,7 +2046,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * + return 0; + + if (unlikely(bsize < csize)) { +- hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", ++ hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %zu)\n", + report->id, csize, bsize); + return -EINVAL; + } +@@ -2068,7 +2068,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * + rsize = max_buffer_size; + + if (bsize < rsize) { +- hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", ++ hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %zu)\n", + report->id, rsize, bsize); + return -EINVAL; + } +-- +2.53.0 + diff --git a/queue-6.18/series b/queue-6.18/series index c71be7aa3e..49b876ceae 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -895,3 +895,4 @@ kvm-x86-fix-xen-hypercall-tracepoint-argument-assignment.patch bluetooth-btmtk-accept-too-short-wmt-func_ctrl-events.patch hid-pass-the-buffer-size-to-hid_report_raw_event.patch hid-core-introduce-hid_safe_input_report.patch +hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch diff --git a/queue-7.0/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch b/queue-7.0/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch new file mode 100644 index 0000000000..36d71dbfeb --- /dev/null +++ b/queue-7.0/hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch @@ -0,0 +1,65 @@ +From ec9d39727eb9fff1e20445c55b22b79a59a21b66 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 17 May 2026 13:51:01 +0900 +Subject: HID: core: Fix size_t specifier in hid_report_raw_event() + +From: Nathan Chancellor + +[ Upstream commit 4d3a2a466b8d68d852a1f3bbf11204b718428dc4 ] + +When building for 32-bit platforms, for which 'size_t' is +'unsigned int', there are warnings around using the incorrect format +specifier to print bsize in hid_report_raw_event(): + + drivers/hid/hid-core.c:2054:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] + 2053 | hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", + | ~~~ + | %zu + 2054 | report->id, csize, bsize); + | ^~~~~ + drivers/hid/hid-core.c:2076:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] + 2075 | hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", + | ~~~ + | %zu + 2076 | report->id, rsize, bsize); + | ^~~~~ + +Use the proper 'size_t' format specifier, '%zu', to clear up the +warnings. + +Cc: stable@vger.kernel.org +Fixes: 2c85c61d1332 ("HID: pass the buffer size to hid_report_raw_event") +Reported-by: Miguel Ojeda +Closes: https://lore.kernel.org/20260516020430.110135-1-ojeda@kernel.org/ +Signed-off-by: Nathan Chancellor +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + drivers/hid/hid-core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index eaac6c84377e3..570884039d781 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -2046,7 +2046,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * + return 0; + + if (unlikely(bsize < csize)) { +- hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", ++ hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %zu)\n", + report->id, csize, bsize); + return -EINVAL; + } +@@ -2068,7 +2068,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * + rsize = max_buffer_size; + + if (bsize < rsize) { +- hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", ++ hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %zu)\n", + report->id, rsize, bsize); + return -EINVAL; + } +-- +2.53.0 + diff --git a/queue-7.0/rseq-implement-read-only-abi-enforcement-for-optimiz.patch b/queue-7.0/rseq-implement-read-only-abi-enforcement-for-optimiz.patch new file mode 100644 index 0000000000..35e89edbd0 --- /dev/null +++ b/queue-7.0/rseq-implement-read-only-abi-enforcement-for-optimiz.patch @@ -0,0 +1,230 @@ +From 920e91e897d3a29191a84c07bda9a1e14cbafee2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 May 2026 18:03:56 +0200 +Subject: rseq: Implement read only ABI enforcement for optimized RSEQ V2 mode + +From: Thomas Gleixner + +commit 82f572449cfe75f12ea985986da60e11f308f77d upstream. + +The optimized RSEQ V2 mode requires that user space adheres to the ABI +specification and does not modify the read-only fields cpu_id_start, +cpu_id, node_id and mm_cid behind the kernel's back. + +While the kernel does not rely on these fields, the adherence to this is a +fundamental prerequisite to allow multiple entities, e.g. libraries, in an +application to utilize the full potential of RSEQ without stepping on each +other toes. + +Validate this adherence on every update of these fields. If the kernel +detects that user space modified the fields, the application is force +terminated. + +Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.845230956%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + include/linux/rseq_entry.h | 83 ++++++++++++++------------------------ + include/linux/rseq_types.h | 4 +- + kernel/rseq.c | 5 +-- + 3 files changed, 35 insertions(+), 57 deletions(-) + +diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h +index c67a3476e9dd6..413a3543fbe8e 100644 +--- a/include/linux/rseq_entry.h ++++ b/include/linux/rseq_entry.h +@@ -238,7 +238,6 @@ static __always_inline bool rseq_grant_slice_extension(bool work_pending) { retu + #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + + bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); +-bool rseq_debug_validate_ids(struct task_struct *t); + + static __always_inline void rseq_note_user_irq_entry(void) + { +@@ -358,43 +357,6 @@ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, + return false; + } + +-/* +- * On debug kernels validate that user space did not mess with it if the +- * debug branch is enabled. +- */ +-bool rseq_debug_validate_ids(struct task_struct *t) +-{ +- struct rseq __user *rseq = t->rseq.usrptr; +- u32 cpu_id, uval, node_id; +- +- /* +- * On the first exit after registering the rseq region CPU ID is +- * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0! +- */ +- node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ? +- cpu_to_node(t->rseq.ids.cpu_id) : 0; +- +- scoped_user_read_access(rseq, efault) { +- unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); +- if (cpu_id != t->rseq.ids.cpu_id) +- goto die; +- unsafe_get_user(uval, &rseq->cpu_id, efault); +- if (uval != cpu_id) +- goto die; +- unsafe_get_user(uval, &rseq->node_id, efault); +- if (uval != node_id) +- goto die; +- unsafe_get_user(uval, &rseq->mm_cid, efault); +- if (uval != t->rseq.ids.mm_cid) +- goto die; +- } +- return true; +-die: +- t->rseq.event.fatal = true; +-efault: +- return false; +-} +- + #endif /* RSEQ_BUILD_SLOW_PATH */ + + /* +@@ -504,20 +466,32 @@ rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long c + * faults in task context are fatal too. + */ + static rseq_inline +-bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, +- u32 node_id, u64 *csaddr) ++bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, u64 *csaddr) + { + struct rseq __user *rseq = t->rseq.usrptr; + +- if (static_branch_unlikely(&rseq_debug_enabled)) { +- if (!rseq_debug_validate_ids(t)) +- return false; +- } +- + scoped_user_rw_access(rseq, efault) { ++ /* Validate the R/O fields for debug and optimized mode */ ++ if (static_branch_unlikely(&rseq_debug_enabled) || rseq_v2(t)) { ++ u32 cpu_id, uval; ++ ++ unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); ++ if (cpu_id != t->rseq.ids.cpu_id) ++ goto die; ++ unsafe_get_user(uval, &rseq->cpu_id, efault); ++ if (uval != cpu_id) ++ goto die; ++ unsafe_get_user(uval, &rseq->node_id, efault); ++ if (uval != t->rseq.ids.node_id) ++ goto die; ++ unsafe_get_user(uval, &rseq->mm_cid, efault); ++ if (uval != t->rseq.ids.mm_cid) ++ goto die; ++ } ++ + unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault); + unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault); +- unsafe_put_user(node_id, &rseq->node_id, efault); ++ unsafe_put_user(ids->node_id, &rseq->node_id, efault); + unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); + if (csaddr) + unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); +@@ -529,10 +503,13 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, + + rseq_slice_clear_grant(t); + /* Cache the new values */ +- t->rseq.ids.cpu_cid = ids->cpu_cid; ++ t->rseq.ids = *ids; + rseq_stat_inc(rseq_stats.ids); + rseq_trace_update(t, ids); + return true; ++ ++die: ++ t->rseq.event.fatal = true; + efault: + return false; + } +@@ -542,11 +519,11 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, + * is in a critical section. + */ + static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs, +- struct rseq_ids *ids, u32 node_id) ++ struct rseq_ids *ids) + { + u64 csaddr; + +- if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr)) ++ if (!rseq_set_ids_get_csaddr(t, ids, &csaddr)) + return false; + + /* +@@ -649,12 +626,12 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t + } + + struct rseq_ids ids = { +- .cpu_id = task_cpu(t), +- .mm_cid = task_mm_cid(t), ++ .cpu_id = task_cpu(t), ++ .mm_cid = task_mm_cid(t), ++ .node_id = cpu_to_node(ids.cpu_id), + }; +- u32 node_id = cpu_to_node(ids.cpu_id); + +- return rseq_update_usr(t, regs, &ids, node_id); ++ return rseq_update_usr(t, regs, &ids); + efault: + return false; + } +diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h +index a469c1870849c..85739a63e85e6 100644 +--- a/include/linux/rseq_types.h ++++ b/include/linux/rseq_types.h +@@ -66,8 +66,9 @@ struct rseq_event { + * compiler emit a single compare on 64-bit + * @cpu_id: The CPU ID which was written last to user space + * @mm_cid: The MM CID which was written last to user space ++ * @node_id: The node ID which was written last to user space + * +- * @cpu_id and @mm_cid are updated when the data is written to user space. ++ * @cpu_id, @mm_cid and @node_id are updated when the data is written to user space. + */ + struct rseq_ids { + union { +@@ -77,6 +78,7 @@ struct rseq_ids { + u32 mm_cid; + }; + }; ++ u32 node_id; + }; + + /** +diff --git a/kernel/rseq.c b/kernel/rseq.c +index aa25753ea1350..101612027f6a3 100644 +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -263,7 +263,6 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs) + }; + struct task_struct *t = current; + struct rseq_ids ids; +- u32 node_id; + bool event; + + if (unlikely(t->flags & PF_EXITING)) +@@ -299,9 +298,9 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs) + if (!event) + return; + +- node_id = cpu_to_node(ids.cpu_id); ++ ids.node_id = cpu_to_node(ids.cpu_id); + +- if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) { ++ if (unlikely(!rseq_update_usr(t, regs, &ids))) { + /* + * Clear the errors just in case this might survive magically, but + * leave the rest intact. +-- +2.53.0 + diff --git a/queue-7.0/rseq-reenable-performance-optimizations-conditionall.patch b/queue-7.0/rseq-reenable-performance-optimizations-conditionall.patch new file mode 100644 index 0000000000..8adfc587f4 --- /dev/null +++ b/queue-7.0/rseq-reenable-performance-optimizations-conditionall.patch @@ -0,0 +1,382 @@ +From b41b0f9acb130997f82822302d77fffd914be511 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 May 2026 18:04:01 +0200 +Subject: rseq: Reenable performance optimizations conditionally + +From: Thomas Gleixner + +commit 99428157dcf32fdac97355aa1cc1364dbc9e073c upstream. + +Due to the incompatibility with TCMalloc the RSEQ optimizations and +extended features (time slice extensions) have been disabled and made +run-time conditional. + +The original RSEQ implementation, which TCMalloc depends on, registers a 32 +byte region (ORIG_RSEG_SIZE). This region has a 32 byte alignment +requirement. + +The extension safe newer variant exposes the kernel RSEQ feature size via +getauxval(AT_RSEQ_FEATURE_SIZE) and the alignment requirement via +getauxval(AT_RSEQ_ALIGN). The alignment requirement is that the registered +RSEQ region is aligned to the next power of two of the feature size. The +kernel currently has a feature size of 33 bytes, which means the alignment +requirement is 64 bytes. + +The TCMalloc RSEQ region is embedded into a cache line aligned data +structure starting at offset 32 bytes so that bytes 28-31 and the +cpu_id_start field at bytes 32-35 form a 64-bit little endian pointer with +the top-most bit (63 set) to check whether the kernel has overwritten +cpu_id_start with an actual CPU id value, which is guaranteed to not have +the top most bit set. + +As this is part of their performance tuned magic, it's a pretty safe +assumption, that TCMalloc won't use a larger RSEQ size. + +This allows the kernel to declare that registrations with a size greater +than the original size of 32 bytes, which is the cases since time slice +extensions got introduced, as RSEQ ABI v2 with the following differences to +the original behaviour: + + 1) Unconditional updates of the user read only fields (CPU, node, MMCID) + are removed. Those fields are only updated on registration, task + migration and MMCID changes. + + 2) Unconditional evaluation of the criticial section pointer is + removed. It's only evaluated when user space was interrupted and was + scheduled out or before delivering a signal in the interrupted + context. + + 3) The read/only requirement of the ID fields is enforced. When the + kernel detects that userspace manipulated the fields, the process is + terminated. This ensures that multiple entities (libraries) can + utilize RSEQ without interfering. + + 4) Todays extended RSEQ feature (time slice extensions) and future + extensions are only enabled in the v2 enabled mode. + +Registrations with the original size of 32 bytes operate in backwards +compatible legacy mode without performance improvements and extended +features. + +Unfortunately that also affects users of older GLIBC versions which +register the original size of 32 bytes and do not evaluate the kernel +required size in the auxiliary vector AT_RSEQ_FEATURE_SIZE. + +That's the result of the lack of enforcement in the original implementation +and the unwillingness of a single entity to cooperate with the larger +ecosystem for many years. + +Implement the required registration changes by restructuring the spaghetti +code and adding the size/version check. Also add documentation about the +differences of legacy and optimized RSEQ V2 mode. + +Thanks to Mathieu for pointing out the ORIG_RSEQ_SIZE constraints! + +Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Link: https://patch.msgid.link/20260428224427.927160119%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + Documentation/userspace-api/rseq.rst | 94 ++++++++++++++++- + kernel/rseq.c | 144 ++++++++++++++++----------- + 2 files changed, 178 insertions(+), 60 deletions(-) + +diff --git a/Documentation/userspace-api/rseq.rst b/Documentation/userspace-api/rseq.rst +index 3cd27a3c7c7e5..8549a6c61531c 100644 +--- a/Documentation/userspace-api/rseq.rst ++++ b/Documentation/userspace-api/rseq.rst +@@ -24,6 +24,97 @@ Quick access to CPU number, node ID + Allows to implement per CPU data efficiently. Documentation is in code and + selftests. :( + ++Optimized RSEQ V2 ++----------------- ++ ++On architectures which utilize the generic entry code and generic TIF bits ++the kernel supports runtime optimizations for RSEQ, which also enable ++enhanced features like scheduler time slice extensions. ++ ++To enable them a task has to register the RSEQ region with at least the ++length advertised by getauxval(AT_RSEQ_FEATURE_SIZE). ++ ++If existing binaries register with RSEQ_ORIG_SIZE (32 bytes), the kernel ++keeps the legacy low performance mode enabled to fulfil the expectations ++of existing users regarding the original RSEQ implementation behaviour. ++ ++The following table documents the ABI and behavioral guarantees of the ++legacy and the optimized V2 mode. ++ ++.. list-table:: RSEQ modes ++ :header-rows: 1 ++ ++ * - Nr ++ - What ++ ++ - Legacy ++ - Optimized V2 ++ ++ * - 1 ++ - The cpu_id_start, cpu_id, node_id and mm_cid fields (User mode read ++ only) ++ .. Legacy ++ - Updated by the kernel unconditionally after each context switch and ++ before signal delivery ++ .. Optimized V2 ++ - Updated by the kernel if and only if they change, i.e. if the task ++ is migrated or mm_cid changes ++ ++ * - 2 ++ - The rseq_cs critical section field ++ .. Legacy ++ - Evaluated and handled unconditionally after each context switch and ++ before signal delivery ++ .. Optimized V2 ++ - Evaluated and handled conditionally only when user space was ++ interrupted and was scheduled out or before delivering a signal in ++ the interrupted context. ++ ++ * - 3 ++ - Read only fields ++ .. Legacy ++ - No strict enforcement except in debug mode ++ .. Optimized V2 ++ - Strict enforcement ++ ++ * - 4 ++ - membarrier(...RSEQ) ++ .. Legacy ++ - All running threads of the process are interrupted and the ID fields ++ are rewritten and eventually active critical sections are aborted ++ before they return to user space. All threads which are scheduled ++ out whether voluntary or not are covered by #1/#2 above. ++ .. Optimized V2 ++ - All running threads of the process are interrupted and eventually ++ active critical sections are aborted before these threads return to ++ user space. The ID fields are only updated if changed as a ++ consequence of the interrupt. All threads which are scheduled out ++ whether voluntary or not are covered by #1/#2 above. ++ ++ * - 5 ++ - Time slice extensions ++ .. Legacy ++ - Not supported ++ .. Optimized V2 ++ - Supported ++ ++The legacy mode is obviously less performant as it does unconditional ++updates and critical section checks even if not strictly required by the ++ABI contract. That can't be changed anymore as some users depend on that ++observed behavior, which in turn enables them to violate the ABI and ++overwrite the cpu_id_start field for their own purposes. This is obviously ++discouraged as it renders RSEQ incompatible with the intended usage and ++breaks the expectation of other libraries in the same application. ++ ++The ABI compliant optimized v2 mode, which respects the read only fields, ++does not require unconditional updates and therefore is way more ++performant. The kernel validates the read only fields for compliance. If ++user space modifies them, the process is killed. Compliant usage allows ++multiple libraries in the same application to benefit from the RSEQ ++functionality without disturbing each other. The ABI compliant optimized v2 ++mode also enables extended RSEQ features like time slice extensions. ++ ++ + Scheduler time slice extensions + ------------------------------- + +@@ -37,7 +128,8 @@ The prerequisites for this functionality are: + + * Enabled at boot time (default is enabled) + +- * A rseq userspace pointer has been registered for the thread ++ * A rseq userspace pointer has been registered for the thread in ++ optimized V2 mode + + The thread has to enable the functionality via prctl(2):: + +diff --git a/kernel/rseq.c b/kernel/rseq.c +index 101612027f6a3..e75e3a5e312c8 100644 +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -412,70 +412,23 @@ static bool rseq_reset_ids(void) + /* The original rseq structure size (including padding) is 32 bytes. */ + #define ORIG_RSEQ_SIZE 32 + +-/* +- * sys_rseq - setup restartable sequences for caller thread. +- */ +-SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) ++static long rseq_register(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) + { + u32 rseqfl = 0; + u8 version = 1; + +- if (flags & RSEQ_FLAG_UNREGISTER) { +- if (flags & ~RSEQ_FLAG_UNREGISTER) +- return -EINVAL; +- /* Unregister rseq for current thread. */ +- if (current->rseq.usrptr != rseq || !current->rseq.usrptr) +- return -EINVAL; +- if (rseq_len != current->rseq.len) +- return -EINVAL; +- if (current->rseq.sig != sig) +- return -EPERM; +- if (!rseq_reset_ids()) +- return -EFAULT; +- rseq_reset(current); +- return 0; +- } +- +- if (unlikely(flags & ~(RSEQ_FLAG_SLICE_EXT_DEFAULT_ON))) +- return -EINVAL; +- +- if (current->rseq.usrptr) { +- /* +- * If rseq is already registered, check whether +- * the provided address differs from the prior +- * one. +- */ +- if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) +- return -EINVAL; +- if (current->rseq.sig != sig) +- return -EPERM; +- /* Already registered. */ +- return -EBUSY; +- } +- +- /* +- * If there was no rseq previously registered, ensure the provided rseq +- * is properly aligned, as communcated to user-space through the ELF +- * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq +- * size, the required alignment is the original struct rseq alignment. +- * +- * The rseq_len is required to be greater or equal to the original rseq +- * size. In order to be valid, rseq_len is either the original rseq size, +- * or large enough to contain all supported fields, as communicated to +- * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. +- */ +- if (rseq_len < ORIG_RSEQ_SIZE || +- (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || +- (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || +- rseq_len < offsetof(struct rseq, end)))) +- return -EINVAL; + if (!access_ok(rseq, rseq_len)) + return -EFAULT; + + /* +- * The version check effectivly disables time slice extensions until the +- * RSEQ ABI V2 registration are implemented. ++ * Architectures, which use the generic IRQ entry code (at least) enable ++ * registrations with a size greater than the original v1 fixed sized ++ * @rseq_len, which has been validated already to utilize the optimized ++ * v2 ABI mode which also enables extended RSEQ features beyond MMCID. + */ ++ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && rseq_len > ORIG_RSEQ_SIZE) ++ version = 2; ++ + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION) && version > 1) { + if (rseq_slice_extension_enabled()) { + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; +@@ -523,11 +476,10 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 + #endif + + /* +- * If rseq was previously inactive, and has just been +- * registered, ensure the cpu_id_start and cpu_id fields +- * are updated before returning to user-space. ++ * Ensure the cpu_id_start and cpu_id fields are updated before ++ * returning to user-space. + */ +- current->rseq.event.has_rseq = true; ++ current->rseq.event.has_rseq = version; + rseq_force_update(); + return 0; + +@@ -535,6 +487,80 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 + return -EFAULT; + } + ++static long rseq_unregister(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) ++{ ++ if (flags & ~RSEQ_FLAG_UNREGISTER) ++ return -EINVAL; ++ if (current->rseq.usrptr != rseq || !current->rseq.usrptr) ++ return -EINVAL; ++ if (rseq_len != current->rseq.len) ++ return -EINVAL; ++ if (current->rseq.sig != sig) ++ return -EPERM; ++ if (!rseq_reset_ids()) ++ return -EFAULT; ++ rseq_reset(current); ++ return 0; ++} ++ ++static long rseq_reregister(struct rseq __user * rseq, u32 rseq_len, u32 sig) ++{ ++ /* ++ * If rseq is already registered, check whether the provided address ++ * differs from the prior one. ++ */ ++ if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) ++ return -EINVAL; ++ if (current->rseq.sig != sig) ++ return -EPERM; ++ /* Already registered. */ ++ return -EBUSY; ++} ++ ++static bool rseq_length_valid(struct rseq __user *rseq, unsigned int rseq_len) ++{ ++ /* ++ * Ensure the provided rseq is properly aligned, as communicated to ++ * user-space through the ELF auxiliary vector AT_RSEQ_ALIGN. If ++ * rseq_len is the original rseq size, the required alignment is the ++ * original struct rseq alignment. ++ * ++ * In order to be valid, rseq_len is either the original rseq size, or ++ * large enough to contain all supported fields, as communicated to ++ * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. ++ */ ++ if (rseq_len < ORIG_RSEQ_SIZE) ++ return false; ++ ++ if (rseq_len == ORIG_RSEQ_SIZE) ++ return IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE); ++ ++ return IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) && ++ rseq_len >= offsetof(struct rseq, end); ++} ++ ++#define RSEQ_FLAGS_SUPPORTED (RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) ++ ++/* ++ * sys_rseq - Register or unregister restartable sequences for the caller thread. ++ */ ++SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) ++{ ++ if (flags & RSEQ_FLAG_UNREGISTER) ++ return rseq_unregister(rseq, rseq_len, flags, sig); ++ ++ if (unlikely(flags & ~RSEQ_FLAGS_SUPPORTED)) ++ return -EINVAL; ++ ++ if (current->rseq.usrptr) ++ return rseq_reregister(rseq, rseq_len, sig); ++ ++ if (!rseq_length_valid(rseq, rseq_len)) ++ return -EINVAL; ++ ++ return rseq_register(rseq, rseq_len, flags, sig); ++} ++ + #ifdef CONFIG_RSEQ_SLICE_EXTENSION + struct slice_timer { + struct hrtimer timer; +-- +2.53.0 + diff --git a/queue-7.0/rseq-revert-to-historical-performance-killing-behavi.patch b/queue-7.0/rseq-revert-to-historical-performance-killing-behavi.patch new file mode 100644 index 0000000000..fd0778007a --- /dev/null +++ b/queue-7.0/rseq-revert-to-historical-performance-killing-behavi.patch @@ -0,0 +1,399 @@ +From 1c42a535bb66abda5132c5b764090133a3ff1b82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 May 2026 18:03:50 +0200 +Subject: rseq: Revert to historical performance killing behaviour + +From: Thomas Gleixner + +commit b9eac6a9d93c952c4b7775a24d5c7a1bbf4c3c00 upstream. + +The recent RSEQ optimization work broke the TCMalloc abuse of the RSEQ ABI +as it not longer unconditionally updates the CPU, node, mm_cid fields, +which are documented as read only for user space. Due to the observed +behavior of the kernel it was possible for TCMalloc to overwrite the +cpu_id_start field for their own purposes and rely on the kernel to update +it unconditionally after each context switch and before signal delivery. + +The RSEQ ABI only guarantees that these fields are updated when the data +changes, i.e. the task is migrated or the MMCID of the task changes due to +switching from or to per CPU ownership mode. + +The optimization work eliminated the unconditional updates and reduced them +to the documented ABI guarantees, which results in a massive performance +win for syscall, scheduling heavy work loads, which in turn breaks the +TCMalloc expectations. + +There have been several options discussed to restore the TCMalloc +functionality while preserving the optimization benefits. They all end up +in a series of hard to maintain workarounds, which in the worst case +introduce overhead for everyone, e.g. in the scheduler. + +The requirements of TCMalloc and the optimization work are diametral and +the required work arounds are a maintainence burden. They end up as fragile +constructs, which are blocking further optimization work and are pretty +much guaranteed to cause more subtle issues down the road. + +The optimization work heavily depends on the generic entry code, which is +not used by all architectures yet. So the rework preserved the original +mechanism moslty unmodified to keep the support for architectures, which +handle rseq in their own exit to user space loop. That code is currently +optimized out by the compiler on architectures which use the generic entry +code. + +This allows to revert back to the original behaviour by replacing the +compile time constant conditions with a runtime condition where required, +which disables the optimization and the dependend time slice extension +feature until the run-time condition can be enabled in the RSEQ +registration code on a per task basis again. + +The following changes are required to restore the original behavior, which +makes TCMalloc work again: + + 1) Replace the compile time constant conditionals with runtime + conditionals where appropriate to prevent the compiler from optimizing + the legacy mode out + + 2) Enforce unconditional update of IDs on context switch for the + non-optimized v1 mode + + 3) Enforce update of IDs in the pre signal delivery path for the + non-optimized v1 mode + + 4) Enforce update of IDs in the membarrier(RSEQ) IPI for the + non-optimized v1 mode + + 5) Make time slice and future extensions depend on optimized v2 mode + +This brings back the full performance problems, but preserves the v2 +optimization code and for generic entry code using architectures also the +TIF_RSEQ optimization which avoids a full evaluation of the exit to user +mode loop in many cases. + +Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") +Reported-by: Mathias Stearn +Signed-off-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Reviewed-by: Dmitry Vyukov +Tested-by: Dmitry Vyukov +Closes: https://lore.kernel.org/CAHnCjA25b+nO2n5CeifknSKHssJpPrjnf+dtr7UgzRw4Zgu=oA@mail.gmail.com +Link: https://patch.msgid.link/20260428224427.517051752%40kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + include/linux/rseq.h | 35 ++++++++++++++++++++++----------- + include/linux/rseq_entry.h | 39 +++++++++++++++++++++++++++---------- + include/linux/rseq_types.h | 9 ++++++++- + kernel/rseq.c | 40 +++++++++++++++++++++++++++++++------- + kernel/sched/membarrier.c | 11 ++++++++++- + 5 files changed, 104 insertions(+), 30 deletions(-) + +diff --git a/include/linux/rseq.h b/include/linux/rseq.h +index f446909551df0..7ef79b25e714b 100644 +--- a/include/linux/rseq.h ++++ b/include/linux/rseq.h +@@ -9,6 +9,11 @@ + + void __rseq_handle_slowpath(struct pt_regs *regs); + ++static __always_inline bool rseq_v2(struct task_struct *t) ++{ ++ return IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && likely(t->rseq.event.has_rseq > 1); ++} ++ + /* Invoked from resume_user_mode_work() */ + static inline void rseq_handle_slowpath(struct pt_regs *regs) + { +@@ -16,8 +21,7 @@ static inline void rseq_handle_slowpath(struct pt_regs *regs) + if (current->rseq.event.slowpath) + __rseq_handle_slowpath(regs); + } else { +- /* '&' is intentional to spare one conditional branch */ +- if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) ++ if (current->rseq.event.sched_switch && current->rseq.event.has_rseq) + __rseq_handle_slowpath(regs); + } + } +@@ -30,9 +34,9 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs); + */ + static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) + { +- if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { +- /* '&' is intentional to spare one conditional branch */ +- if (current->rseq.event.has_rseq & current->rseq.event.user_irq) ++ if (rseq_v2(current)) { ++ /* has_rseq is implied in rseq_v2() */ ++ if (current->rseq.event.user_irq) + __rseq_signal_deliver(ksig->sig, regs); + } else { + if (current->rseq.event.has_rseq) +@@ -50,15 +54,22 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) + { + struct rseq_event *ev = &t->rseq.event; + +- if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { ++ /* ++ * Only apply the user_irq optimization for RSEQ ABI V2 registrations. ++ * Legacy users like TCMalloc rely on the original ABI V1 behaviour ++ * which updates IDs on every context swtich. ++ */ ++ if (rseq_v2(t)) { + /* +- * Avoid a boat load of conditionals by using simple logic +- * to determine whether NOTIFY_RESUME needs to be raised. ++ * Avoid a boat load of conditionals by using simple logic to ++ * determine whether TIF_NOTIFY_RESUME or TIF_RSEQ needs to be ++ * raised. + * +- * It's required when the CPU or MM CID has changed or +- * the entry was from user space. ++ * It's required when the CPU or MM CID has changed or the entry ++ * was via interrupt from user space. ev->has_rseq does not have ++ * to be evaluated here because rseq_v2() implies has_rseq. + */ +- bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; ++ bool raise = ev->user_irq | ev->ids_changed; + + if (raise) { + ev->sched_switch = true; +@@ -66,6 +77,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) + } + } else { + if (ev->has_rseq) { ++ t->rseq.event.ids_changed = true; + t->rseq.event.sched_switch = true; + rseq_raise_notify_resume(t); + } +@@ -161,6 +173,7 @@ static inline unsigned int rseq_alloc_align(void) + } + + #else /* CONFIG_RSEQ */ ++static inline bool rseq_v2(struct task_struct *t) { return false; } + static inline void rseq_handle_slowpath(struct pt_regs *regs) { } + static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } + static inline void rseq_sched_switch_event(struct task_struct *t) { } +diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h +index c6831c93cd6ee..c67a3476e9dd6 100644 +--- a/include/linux/rseq_entry.h ++++ b/include/linux/rseq_entry.h +@@ -110,6 +110,20 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) + t->rseq.slice.state.granted = false; + } + ++/* ++ * Open coded, so it can be invoked within a user access region. ++ * ++ * This clears the user space state of the time slice extensions field only when ++ * the task has registered the optimized RSEQ_ABI V2. Some legacy registrations, ++ * e.g. TCMalloc, have conflicting non-ABI fields in struct RSEQ, which would be ++ * overwritten by an unconditional write. ++ */ ++#define rseq_slice_clear_user(rseq, efault) \ ++do { \ ++ if (rseq_slice_extension_enabled()) \ ++ unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); \ ++} while (0) ++ + static __always_inline bool rseq_grant_slice_extension(bool work_pending) + { + struct task_struct *curr = current; +@@ -220,6 +234,7 @@ static __always_inline bool rseq_slice_extension_enabled(void) { return false; } + static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } + static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } + static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; } ++#define rseq_slice_clear_user(rseq, efault) do { } while (0) + #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + + bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); +@@ -507,11 +522,9 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, + if (csaddr) + unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); + +- /* Open coded, so it's in the same user access region */ +- if (rseq_slice_extension_enabled()) { +- /* Unconditionally clear it, no point in conditionals */ +- unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); +- } ++ /* RSEQ ABI V2 only operations */ ++ if (rseq_v2(t)) ++ rseq_slice_clear_user(rseq, efault); + } + + rseq_slice_clear_grant(t); +@@ -602,6 +615,14 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t + * interrupts disabled + */ + guard(pagefault)(); ++ /* ++ * This optimization is only valid when the task registered for the ++ * optimized RSEQ_ABI_V2 variant. Some legacy users rely on the original ++ * RSEQ implementation behaviour which unconditionally updated the IDs. ++ * rseq_sched_switch_event() ensures that legacy registrations always ++ * have both sched_switch and ids_changed set, which is compatible with ++ * the historical TIF_NOTIFY_RESUME behaviour. ++ */ + if (likely(!t->rseq.event.ids_changed)) { + struct rseq __user *rseq = t->rseq.usrptr; + /* +@@ -613,11 +634,9 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t + scoped_user_rw_access(rseq, efault) { + unsafe_get_user(csaddr, &rseq->rseq_cs, efault); + +- /* Open coded, so it's in the same user access region */ +- if (rseq_slice_extension_enabled()) { +- /* Unconditionally clear it, no point in conditionals */ +- unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); +- } ++ /* RSEQ ABI V2 only operations */ ++ if (rseq_v2(t)) ++ rseq_slice_clear_user(rseq, efault); + } + + rseq_slice_clear_grant(t); +diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h +index 0b42045988db0..a469c1870849c 100644 +--- a/include/linux/rseq_types.h ++++ b/include/linux/rseq_types.h +@@ -9,6 +9,12 @@ + #ifdef CONFIG_RSEQ + struct rseq; + ++/* ++ * rseq_event::has_rseq contains the ABI version number so preserving it ++ * in AND operations requires a mask. ++ */ ++#define RSEQ_HAS_RSEQ_VERSION_MASK 0xff ++ + /** + * struct rseq_event - Storage for rseq related event management + * @all: Compound to initialize and clear the data efficiently +@@ -17,7 +23,8 @@ struct rseq; + * exit to user + * @ids_changed: Indicator that IDs need to be updated + * @user_irq: True on interrupt entry from user mode +- * @has_rseq: True if the task has a rseq pointer installed ++ * @has_rseq: Greater than 0 if the task has a rseq pointer installed. ++ * Contains the RSEQ version number + * @error: Compound error code for the slow path to analyze + * @fatal: User space data corrupted or invalid + * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME +diff --git a/kernel/rseq.c b/kernel/rseq.c +index 586f58f652c6e..aa25753ea1350 100644 +--- a/kernel/rseq.c ++++ b/kernel/rseq.c +@@ -253,11 +253,14 @@ static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs) + static void rseq_slowpath_update_usr(struct pt_regs *regs) + { + /* +- * Preserve rseq state and user_irq state. The generic entry code +- * clears user_irq on the way out, the non-generic entry +- * architectures are not having user_irq. ++ * Preserve has_rseq and user_irq state. The generic entry code clears ++ * user_irq on the way out, the non-generic entry architectures are not ++ * setting user_irq. + */ +- const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, }; ++ const struct rseq_event evt_mask = { ++ .has_rseq = RSEQ_HAS_RSEQ_VERSION_MASK, ++ .user_irq = true, ++ }; + struct task_struct *t = current; + struct rseq_ids ids; + u32 node_id; +@@ -330,8 +333,9 @@ void __rseq_handle_slowpath(struct pt_regs *regs) + void __rseq_signal_deliver(int sig, struct pt_regs *regs) + { + rseq_stat_inc(rseq_stats.signal); ++ + /* +- * Don't update IDs, they are handled on exit to user if ++ * Don't update IDs yet, they are handled on exit to user if + * necessary. The important thing is to abort a critical section of + * the interrupted context as after this point the instruction + * pointer in @regs points to the signal handler. +@@ -344,6 +348,13 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs) + current->rseq.event.error = 0; + force_sigsegv(sig); + } ++ ++ /* ++ * In legacy mode, force the update of IDs before returning to user ++ * space to stay compatible. ++ */ ++ if (!rseq_v2(current)) ++ rseq_force_update(); + } + + /* +@@ -408,6 +419,7 @@ static bool rseq_reset_ids(void) + SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) + { + u32 rseqfl = 0; ++ u8 version = 1; + + if (flags & RSEQ_FLAG_UNREGISTER) { + if (flags & ~RSEQ_FLAG_UNREGISTER) +@@ -461,7 +473,11 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 + if (!access_ok(rseq, rseq_len)) + return -EFAULT; + +- if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { ++ /* ++ * The version check effectivly disables time slice extensions until the ++ * RSEQ ABI V2 registration are implemented. ++ */ ++ if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION) && version > 1) { + if (rseq_slice_extension_enabled()) { + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + if (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) +@@ -484,7 +500,15 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 + unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); + unsafe_put_user(0U, &rseq->node_id, efault); + unsafe_put_user(0U, &rseq->mm_cid, efault); +- unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); ++ ++ /* ++ * All fields past mm_cid are only valid for non-legacy v2 ++ * registrations. ++ */ ++ if (version > 1) { ++ if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) ++ unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); ++ } + } + + /* +@@ -712,6 +736,8 @@ int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) + return -ENOTSUPP; + if (!current->rseq.usrptr) + return -ENXIO; ++ if (!rseq_v2(current)) ++ return -ENOTSUPP; + + /* No change? */ + if (enable == !!current->rseq.slice.state.enabled) +diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c +index 6234456037252..226a6329f3e92 100644 +--- a/kernel/sched/membarrier.c ++++ b/kernel/sched/membarrier.c +@@ -199,7 +199,16 @@ static void ipi_rseq(void *info) + * is negligible. + */ + smp_mb(); +- rseq_sched_switch_event(current); ++ /* ++ * Legacy mode requires that IDs are written and the critical section is ++ * evaluated. V2 optimized mode handles the critical section and IDs are ++ * only updated if they change as a consequence of preemption after ++ * return from this IPI. ++ */ ++ if (rseq_v2(current)) ++ rseq_sched_switch_event(current); ++ else ++ rseq_force_update(); + } + + static void ipi_sync_rq_state(void *info) +-- +2.53.0 + diff --git a/queue-7.0/series b/queue-7.0/series index cac06c2fed..845ebd49ac 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -1061,3 +1061,7 @@ kvm-x86-fix-xen-hypercall-tracepoint-argument-assignment.patch bluetooth-btmtk-accept-too-short-wmt-func_ctrl-events.patch hid-pass-the-buffer-size-to-hid_report_raw_event.patch hid-core-introduce-hid_safe_input_report.patch +rseq-revert-to-historical-performance-killing-behavi.patch +rseq-implement-read-only-abi-enforcement-for-optimiz.patch +rseq-reenable-performance-optimizations-conditionall.patch +hid-core-fix-size_t-specifier-in-hid_report_raw_even.patch