From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 23 Feb 2024 16:10:08 +0000 (+0100)
Subject: 5.15-stable patches
X-Git-Tag: v4.19.308~96
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2bc080b6c6a00c7b6c09dbb502f364172e863534;p=thirdparty%2Fkernel%2Fstable-queue.git

5.15-stable patches

added patches:
	sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
	sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
	x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
---

diff --git a/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
new file mode 100644
index 00000000000..fd67b1f8399
--- /dev/null
+++ b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
@@ -0,0 +1,94 @@
+From 079be8fc630943d9fc70a97807feb73d169ee3fc Mon Sep 17 00:00:00 2001
+From: Cyril Hrubis <chrubis@suse.cz>
+Date: Mon, 2 Oct 2023 13:55:51 +0200
+Subject: sched/rt: Disallow writing invalid values to sched_rt_period_us
+
+From: Cyril Hrubis <chrubis@suse.cz>
+
+commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream.
+
+The validation of the value written to sched_rt_period_us was broken
+because:
+
+  - the sysclt_sched_rt_period is declared as unsigned int
+  - parsed by proc_do_intvec()
+  - the range is asserted after the value parsed by proc_do_intvec()
+
+Because of this negative values written to the file were written into a
+unsigned integer that were later on interpreted as large positive
+integers which did passed the check:
+
+  if (sysclt_sched_rt_period <= 0)
+	return EINVAL;
+
+This commit fixes the parsing by setting explicit range for both
+perid_us and runtime_us into the sched_rt_sysctls table and processes
+the values with proc_dointvec_minmax() instead.
+
+Alternatively if we wanted to use full range of unsigned int for the
+period value we would have to split the proc_handler and use
+proc_douintvec() for it however even the
+Documentation/scheduller/sched-rt-group.rst describes the range as 1 to
+INT_MAX.
+
+As far as I can tell the only problem this causes is that the sysctl
+file allows writing negative values which when read back may confuse
+userspace.
+
+There is also a LTP test being submitted for these sysctl files at:
+
+  http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/
+
+Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz
+[ pvorel: rebased for 5.15, 5.10 ]
+Reviewed-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/rt.c |    5 +----
+ kernel/sysctl.c   |    4 ++++
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -2806,9 +2806,6 @@ static int sched_rt_global_constraints(v
+ 
+ static int sched_rt_global_validate(void)
+ {
+-	if (sysctl_sched_rt_period <= 0)
+-		return -EINVAL;
+-
+ 	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+ 		((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
+ 		 ((u64)sysctl_sched_rt_runtime *
+@@ -2839,7 +2836,7 @@ int sched_rt_handler(struct ctl_table *t
+ 	old_period = sysctl_sched_rt_period;
+ 	old_runtime = sysctl_sched_rt_runtime;
+ 
+-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
++	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ 
+ 	if (!ret && write) {
+ 		ret = sched_rt_global_validate();
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1821,6 +1821,8 @@ static struct ctl_table kern_table[] = {
+ 		.maxlen		= sizeof(unsigned int),
+ 		.mode		= 0644,
+ 		.proc_handler	= sched_rt_handler,
++		.extra1		= SYSCTL_ONE,
++		.extra2		= SYSCTL_INT_MAX,
+ 	},
+ 	{
+ 		.procname	= "sched_rt_runtime_us",
+@@ -1828,6 +1830,8 @@ static struct ctl_table kern_table[] = {
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= sched_rt_handler,
++		.extra1		= SYSCTL_NEG_ONE,
++		.extra2		= SYSCTL_INT_MAX,
+ 	},
+ 	{
+ 		.procname	= "sched_deadline_period_max_us",
diff --git a/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
new file mode 100644
index 00000000000..8a1aaccc7bc
--- /dev/null
+++ b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
@@ -0,0 +1,72 @@
+From c7fcb99877f9f542c918509b2801065adcaf46fa Mon Sep 17 00:00:00 2001
+From: Cyril Hrubis <chrubis@suse.cz>
+Date: Wed, 2 Aug 2023 17:19:05 +0200
+Subject: sched/rt: Fix sysctl_sched_rr_timeslice intial value
+
+From: Cyril Hrubis <chrubis@suse.cz>
+
+commit c7fcb99877f9f542c918509b2801065adcaf46fa upstream.
+
+There is a 10% rounding error in the intial value of the
+sysctl_sched_rr_timeslice with CONFIG_HZ_300=y.
+
+This was found with LTP test sched_rr_get_interval01:
+
+sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
+sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
+sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90
+sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
+sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
+sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90
+
+What this test does is to compare the return value from the
+sched_rr_get_interval() and the sched_rr_timeslice_ms sysctl file and
+fails if they do not match.
+
+The problem it found is the intial sysctl file value which was computed as:
+
+static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+
+which works fine as long as MSEC_PER_SEC is multiple of HZ, however it
+introduces 10% rounding error for CONFIG_HZ_300:
+
+(MSEC_PER_SEC / HZ) * (100 * HZ / 1000)
+
+(1000 / 300) * (100 * 300 / 1000)
+
+3 * 30 = 90
+
+This can be easily fixed by reversing the order of the multiplication
+and division. After this fix we get:
+
+(MSEC_PER_SEC * (100 * HZ / 1000)) / HZ
+
+(1000 * (100 * 300 / 1000)) / 300
+
+(1000 * 30) / 300 = 100
+
+Fixes: 975e155ed873 ("sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds")
+Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Petr Vorel <pvorel@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Tested-by: Petr Vorel <pvorel@suse.cz>
+Link: https://lore.kernel.org/r/20230802151906.25258-2-chrubis@suse.cz
+[ pvorel: rebased for 5.15, 5.10 ]
+Signed-off-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/rt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -8,7 +8,7 @@
+ #include "pelt.h"
+ 
+ int sched_rr_timeslice = RR_TIMESLICE;
+-int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
++int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
+ /* More than 4 hours if BW_SHIFT equals 20. */
+ static const u64 max_rt_runtime = MAX_BW;
+ 
diff --git a/queue-5.15/series b/queue-5.15/series
index 37038ce3470..fc76bdbd88d 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -12,3 +12,6 @@ bpf-do-cleanup-in-bpf_bprintf_cleanup-only-when-needed.patch
 bpf-remove-trace_printk_lock.patch
 userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch
 zonefs-improve-error-handling.patch
+x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
+sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
+sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
diff --git a/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
new file mode 100644
index 00000000000..c66a81abe7f
--- /dev/null
+++ b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
@@ -0,0 +1,95 @@
+From d877550eaf2dc9090d782864c96939397a3c6835 Mon Sep 17 00:00:00 2001
+From: Andrei Vagin <avagin@google.com>
+Date: Mon, 29 Jan 2024 22:36:03 -0800
+Subject: x86/fpu: Stop relying on userspace for info to fault in xsave buffer
+
+From: Andrei Vagin <avagin@google.com>
+
+commit d877550eaf2dc9090d782864c96939397a3c6835 upstream.
+
+Before this change, the expected size of the user space buffer was
+taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed
+from user-space, so it is possible construct a sigreturn frame where:
+
+ * fx_sw->xstate_size is smaller than the size required by valid bits in
+   fx_sw->xfeatures.
+ * user-space unmaps parts of the sigrame fpu buffer so that not all of
+   the buffer required by xrstor is accessible.
+
+In this case, xrstor tries to restore and accesses the unmapped area
+which results in a fault. But fault_in_readable succeeds because buf +
+fx_sw->xstate_size is within the still mapped area, so it goes back and
+tries xrstor again. It will spin in this loop forever.
+
+Instead, fault in the maximum size which can be touched by XRSTOR (taken
+from fpstate->user_size).
+
+[ dhansen: tweak subject / changelog ]
+
+Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path")
+Reported-by: Konstantin Bogomolov <bogomolov@google.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Andrei Vagin <avagin@google.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com
+Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -246,12 +246,13 @@ static int __restore_fpregs_from_user(vo
+  * Attempt to restore the FPU registers directly from user memory.
+  * Pagefaults are handled and any errors returned are fatal.
+  */
+-static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
+-				    bool fx_only, unsigned int size)
++static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
+ {
+ 	struct fpu *fpu = &current->thread.fpu;
+ 	int ret;
+ 
++	/* Restore enabled features only. */
++	xrestore &= xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
+ retry:
+ 	fpregs_lock();
+ 	pagefault_disable();
+@@ -278,7 +279,7 @@ retry:
+ 		if (ret != -EFAULT)
+ 			return -EINVAL;
+ 
+-		if (!fault_in_readable(buf, size))
++		if (!fault_in_readable(buf, fpu_user_xstate_size))
+ 			goto retry;
+ 		return -EFAULT;
+ 	}
+@@ -303,7 +304,6 @@ retry:
+ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
+ 			     bool ia32_fxstate)
+ {
+-	int state_size = fpu_kernel_xstate_size;
+ 	struct task_struct *tsk = current;
+ 	struct fpu *fpu = &tsk->thread.fpu;
+ 	struct user_i387_ia32_struct env;
+@@ -319,7 +319,6 @@ static int __fpu_restore_sig(void __user
+ 			return ret;
+ 
+ 		fx_only = !fx_sw_user.magic1;
+-		state_size = fx_sw_user.xstate_size;
+ 		user_xfeatures = fx_sw_user.xfeatures;
+ 	} else {
+ 		user_xfeatures = XFEATURE_MASK_FPSSE;
+@@ -332,8 +331,7 @@ static int __fpu_restore_sig(void __user
+ 		 * faults. If it does, fall back to the slow path below, going
+ 		 * through the kernel buffer with the enabled pagefault handler.
+ 		 */
+-		return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
+-						state_size);
++		return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
+ 	}
+ 
+ 	/*