From: Greg Kroah-Hartman Date: Fri, 23 Feb 2024 16:10:08 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.19.308~96 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2bc080b6c6a00c7b6c09dbb502f364172e863534;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch --- diff --git a/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch new file mode 100644 index 00000000000..fd67b1f8399 --- /dev/null +++ b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch @@ -0,0 +1,94 @@ +From 079be8fc630943d9fc70a97807feb73d169ee3fc Mon Sep 17 00:00:00 2001 +From: Cyril Hrubis +Date: Mon, 2 Oct 2023 13:55:51 +0200 +Subject: sched/rt: Disallow writing invalid values to sched_rt_period_us + +From: Cyril Hrubis + +commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream. + +The validation of the value written to sched_rt_period_us was broken +because: + + - the sysclt_sched_rt_period is declared as unsigned int + - parsed by proc_do_intvec() + - the range is asserted after the value parsed by proc_do_intvec() + +Because of this negative values written to the file were written into a +unsigned integer that were later on interpreted as large positive +integers which did passed the check: + + if (sysclt_sched_rt_period <= 0) + return EINVAL; + +This commit fixes the parsing by setting explicit range for both +perid_us and runtime_us into the sched_rt_sysctls table and processes +the values with proc_dointvec_minmax() instead. + +Alternatively if we wanted to use full range of unsigned int for the +period value we would have to split the proc_handler and use +proc_douintvec() for it however even the +Documentation/scheduller/sched-rt-group.rst describes the range as 1 to +INT_MAX. + +As far as I can tell the only problem this causes is that the sysctl +file allows writing negative values which when read back may confuse +userspace. + +There is also a LTP test being submitted for these sysctl files at: + + http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/ + +Signed-off-by: Cyril Hrubis +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz +[ pvorel: rebased for 5.15, 5.10 ] +Reviewed-by: Petr Vorel +Signed-off-by: Petr Vorel +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/rt.c | 5 +---- + kernel/sysctl.c | 4 ++++ + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -2806,9 +2806,6 @@ static int sched_rt_global_constraints(v + + static int sched_rt_global_validate(void) + { +- if (sysctl_sched_rt_period <= 0) +- return -EINVAL; +- + if ((sysctl_sched_rt_runtime != RUNTIME_INF) && + ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || + ((u64)sysctl_sched_rt_runtime * +@@ -2839,7 +2836,7 @@ int sched_rt_handler(struct ctl_table *t + old_period = sysctl_sched_rt_period; + old_runtime = sysctl_sched_rt_runtime; + +- ret = proc_dointvec(table, write, buffer, lenp, ppos); ++ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (!ret && write) { + ret = sched_rt_global_validate(); +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -1821,6 +1821,8 @@ static struct ctl_table kern_table[] = { + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_rt_handler, ++ .extra1 = SYSCTL_ONE, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "sched_rt_runtime_us", +@@ -1828,6 +1830,8 @@ static struct ctl_table kern_table[] = { + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rt_handler, ++ .extra1 = SYSCTL_NEG_ONE, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "sched_deadline_period_max_us", diff --git a/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch new file mode 100644 index 00000000000..8a1aaccc7bc --- /dev/null +++ b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch @@ -0,0 +1,72 @@ +From c7fcb99877f9f542c918509b2801065adcaf46fa Mon Sep 17 00:00:00 2001 +From: Cyril Hrubis +Date: Wed, 2 Aug 2023 17:19:05 +0200 +Subject: sched/rt: Fix sysctl_sched_rr_timeslice intial value + +From: Cyril Hrubis + +commit c7fcb99877f9f542c918509b2801065adcaf46fa upstream. + +There is a 10% rounding error in the intial value of the +sysctl_sched_rr_timeslice with CONFIG_HZ_300=y. + +This was found with LTP test sched_rr_get_interval01: + +sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed +sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns +sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90 +sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed +sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns +sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90 + +What this test does is to compare the return value from the +sched_rr_get_interval() and the sched_rr_timeslice_ms sysctl file and +fails if they do not match. + +The problem it found is the intial sysctl file value which was computed as: + +static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; + +which works fine as long as MSEC_PER_SEC is multiple of HZ, however it +introduces 10% rounding error for CONFIG_HZ_300: + +(MSEC_PER_SEC / HZ) * (100 * HZ / 1000) + +(1000 / 300) * (100 * 300 / 1000) + +3 * 30 = 90 + +This can be easily fixed by reversing the order of the multiplication +and division. After this fix we get: + +(MSEC_PER_SEC * (100 * HZ / 1000)) / HZ + +(1000 * (100 * 300 / 1000)) / 300 + +(1000 * 30) / 300 = 100 + +Fixes: 975e155ed873 ("sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds") +Signed-off-by: Cyril Hrubis +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Petr Vorel +Acked-by: Mel Gorman +Tested-by: Petr Vorel +Link: https://lore.kernel.org/r/20230802151906.25258-2-chrubis@suse.cz +[ pvorel: rebased for 5.15, 5.10 ] +Signed-off-by: Petr Vorel +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/rt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -8,7 +8,7 @@ + #include "pelt.h" + + int sched_rr_timeslice = RR_TIMESLICE; +-int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; ++int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ; + /* More than 4 hours if BW_SHIFT equals 20. */ + static const u64 max_rt_runtime = MAX_BW; + diff --git a/queue-5.15/series b/queue-5.15/series index 37038ce3470..fc76bdbd88d 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -12,3 +12,6 @@ bpf-do-cleanup-in-bpf_bprintf_cleanup-only-when-needed.patch bpf-remove-trace_printk_lock.patch userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch zonefs-improve-error-handling.patch +x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch +sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch +sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch diff --git a/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch new file mode 100644 index 00000000000..c66a81abe7f --- /dev/null +++ b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch @@ -0,0 +1,95 @@ +From d877550eaf2dc9090d782864c96939397a3c6835 Mon Sep 17 00:00:00 2001 +From: Andrei Vagin +Date: Mon, 29 Jan 2024 22:36:03 -0800 +Subject: x86/fpu: Stop relying on userspace for info to fault in xsave buffer + +From: Andrei Vagin + +commit d877550eaf2dc9090d782864c96939397a3c6835 upstream. + +Before this change, the expected size of the user space buffer was +taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed +from user-space, so it is possible construct a sigreturn frame where: + + * fx_sw->xstate_size is smaller than the size required by valid bits in + fx_sw->xfeatures. + * user-space unmaps parts of the sigrame fpu buffer so that not all of + the buffer required by xrstor is accessible. + +In this case, xrstor tries to restore and accesses the unmapped area +which results in a fault. But fault_in_readable succeeds because buf + +fx_sw->xstate_size is within the still mapped area, so it goes back and +tries xrstor again. It will spin in this loop forever. + +Instead, fault in the maximum size which can be touched by XRSTOR (taken +from fpstate->user_size). + +[ dhansen: tweak subject / changelog ] + +Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path") +Reported-by: Konstantin Bogomolov +Suggested-by: Thomas Gleixner +Signed-off-by: Andrei Vagin +Signed-off-by: Dave Hansen +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com +Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -246,12 +246,13 @@ static int __restore_fpregs_from_user(vo + * Attempt to restore the FPU registers directly from user memory. + * Pagefaults are handled and any errors returned are fatal. + */ +-static int restore_fpregs_from_user(void __user *buf, u64 xrestore, +- bool fx_only, unsigned int size) ++static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) + { + struct fpu *fpu = ¤t->thread.fpu; + int ret; + ++ /* Restore enabled features only. */ ++ xrestore &= xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; + retry: + fpregs_lock(); + pagefault_disable(); +@@ -278,7 +279,7 @@ retry: + if (ret != -EFAULT) + return -EINVAL; + +- if (!fault_in_readable(buf, size)) ++ if (!fault_in_readable(buf, fpu_user_xstate_size)) + goto retry; + return -EFAULT; + } +@@ -303,7 +304,6 @@ retry: + static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, + bool ia32_fxstate) + { +- int state_size = fpu_kernel_xstate_size; + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + struct user_i387_ia32_struct env; +@@ -319,7 +319,6 @@ static int __fpu_restore_sig(void __user + return ret; + + fx_only = !fx_sw_user.magic1; +- state_size = fx_sw_user.xstate_size; + user_xfeatures = fx_sw_user.xfeatures; + } else { + user_xfeatures = XFEATURE_MASK_FPSSE; +@@ -332,8 +331,7 @@ static int __fpu_restore_sig(void __user + * faults. If it does, fall back to the slow path below, going + * through the kernel buffer with the enabled pagefault handler. + */ +- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, +- state_size); ++ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); + } + + /*