5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)
diff --git a/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch

new file mode 100644 (file)

index 0000000..fd67b1f
--- /dev/null
+++ b/queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
@@ -0,0 +1,94 @@
+From 079be8fc630943d9fc70a97807feb73d169ee3fc Mon Sep 17 00:00:00 2001
+From: Cyril Hrubis <chrubis@suse.cz>
+Date: Mon, 2 Oct 2023 13:55:51 +0200
+Subject: sched/rt: Disallow writing invalid values to sched_rt_period_us
+
+From: Cyril Hrubis <chrubis@suse.cz>
+
+commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream.
+
+The validation of the value written to sched_rt_period_us was broken
+because:
+
+  - the sysclt_sched_rt_period is declared as unsigned int
+  - parsed by proc_do_intvec()
+  - the range is asserted after the value parsed by proc_do_intvec()
+
+Because of this negative values written to the file were written into a
+unsigned integer that were later on interpreted as large positive
+integers which did passed the check:
+
+  if (sysclt_sched_rt_period <= 0)
+       return EINVAL;
+
+This commit fixes the parsing by setting explicit range for both
+perid_us and runtime_us into the sched_rt_sysctls table and processes
+the values with proc_dointvec_minmax() instead.
+
+Alternatively if we wanted to use full range of unsigned int for the
+period value we would have to split the proc_handler and use
+proc_douintvec() for it however even the
+Documentation/scheduller/sched-rt-group.rst describes the range as 1 to
+INT_MAX.
+
+As far as I can tell the only problem this causes is that the sysctl
+file allows writing negative values which when read back may confuse
+userspace.
+
+There is also a LTP test being submitted for these sysctl files at:
+
+  http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/
+
+Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz
+[ pvorel: rebased for 5.15, 5.10 ]
+Reviewed-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/rt.c |    5 +----
+ kernel/sysctl.c   |    4 ++++
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -2806,9 +2806,6 @@ static int sched_rt_global_constraints(v
+ 
+ static int sched_rt_global_validate(void)
+ {
+-      if (sysctl_sched_rt_period <= 0)
+-              return -EINVAL;
+-
+       if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+               ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
+                ((u64)sysctl_sched_rt_runtime *
+@@ -2839,7 +2836,7 @@ int sched_rt_handler(struct ctl_table *t
+       old_period = sysctl_sched_rt_period;
+       old_runtime = sysctl_sched_rt_runtime;
+ 
+-      ret = proc_dointvec(table, write, buffer, lenp, ppos);
++      ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ 
+       if (!ret && write) {
+               ret = sched_rt_global_validate();
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1821,6 +1821,8 @@ static struct ctl_table kern_table[] = {
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = sched_rt_handler,
++              .extra1         = SYSCTL_ONE,
++              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "sched_rt_runtime_us",
+@@ -1828,6 +1830,8 @@ static struct ctl_table kern_table[] = {
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = sched_rt_handler,
++              .extra1         = SYSCTL_NEG_ONE,
++              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "sched_deadline_period_max_us",
diff --git a/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch

new file mode 100644 (file)

index 0000000..8a1aacc
--- /dev/null
+++ b/queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
@@ -0,0 +1,72 @@
+From c7fcb99877f9f542c918509b2801065adcaf46fa Mon Sep 17 00:00:00 2001
+From: Cyril Hrubis <chrubis@suse.cz>
+Date: Wed, 2 Aug 2023 17:19:05 +0200
+Subject: sched/rt: Fix sysctl_sched_rr_timeslice intial value
+
+From: Cyril Hrubis <chrubis@suse.cz>
+
+commit c7fcb99877f9f542c918509b2801065adcaf46fa upstream.
+
+There is a 10% rounding error in the intial value of the
+sysctl_sched_rr_timeslice with CONFIG_HZ_300=y.
+
+This was found with LTP test sched_rr_get_interval01:
+
+sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
+sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
+sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90
+sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
+sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
+sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90
+
+What this test does is to compare the return value from the
+sched_rr_get_interval() and the sched_rr_timeslice_ms sysctl file and
+fails if they do not match.
+
+The problem it found is the intial sysctl file value which was computed as:
+
+static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+
+which works fine as long as MSEC_PER_SEC is multiple of HZ, however it
+introduces 10% rounding error for CONFIG_HZ_300:
+
+(MSEC_PER_SEC / HZ) * (100 * HZ / 1000)
+
+(1000 / 300) * (100 * 300 / 1000)
+
+3 * 30 = 90
+
+This can be easily fixed by reversing the order of the multiplication
+and division. After this fix we get:
+
+(MSEC_PER_SEC * (100 * HZ / 1000)) / HZ
+
+(1000 * (100 * 300 / 1000)) / 300
+
+(1000 * 30) / 300 = 100
+
+Fixes: 975e155ed873 ("sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds")
+Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Petr Vorel <pvorel@suse.cz>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Tested-by: Petr Vorel <pvorel@suse.cz>
+Link: https://lore.kernel.org/r/20230802151906.25258-2-chrubis@suse.cz
+[ pvorel: rebased for 5.15, 5.10 ]
+Signed-off-by: Petr Vorel <pvorel@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/rt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -8,7 +8,7 @@
+ #include "pelt.h"
+ 
+ int sched_rr_timeslice = RR_TIMESLICE;
+-int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
++int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
+ /* More than 4 hours if BW_SHIFT equals 20. */
+ static const u64 max_rt_runtime = MAX_BW;
+ 
diff --git a/queue-5.15/series b/queue-5.15/series

index 37038ce3470597fb059e3080918377b55f5f3181..fc76bdbd88d5cd663723f5cbcd3559ede98cafba 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -12,3 +12,6 @@ bpf-do-cleanup-in-bpf_bprintf_cleanup-only-when-needed.patch
  bpf-remove-trace_printk_lock.patch
  userfaultfd-fix-mmap_changing-checking-in-mfill_atomic_hugetlb.patch
  zonefs-improve-error-handling.patch
+x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
+sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch
+sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch
diff --git a/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch

new file mode 100644 (file)

index 0000000..c66a81a
--- /dev/null
+++ b/queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch
@@ -0,0 +1,95 @@
+From d877550eaf2dc9090d782864c96939397a3c6835 Mon Sep 17 00:00:00 2001
+From: Andrei Vagin <avagin@google.com>
+Date: Mon, 29 Jan 2024 22:36:03 -0800
+Subject: x86/fpu: Stop relying on userspace for info to fault in xsave buffer
+
+From: Andrei Vagin <avagin@google.com>
+
+commit d877550eaf2dc9090d782864c96939397a3c6835 upstream.
+
+Before this change, the expected size of the user space buffer was
+taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed
+from user-space, so it is possible construct a sigreturn frame where:
+
+ * fx_sw->xstate_size is smaller than the size required by valid bits in
+   fx_sw->xfeatures.
+ * user-space unmaps parts of the sigrame fpu buffer so that not all of
+   the buffer required by xrstor is accessible.
+
+In this case, xrstor tries to restore and accesses the unmapped area
+which results in a fault. But fault_in_readable succeeds because buf +
+fx_sw->xstate_size is within the still mapped area, so it goes back and
+tries xrstor again. It will spin in this loop forever.
+
+Instead, fault in the maximum size which can be touched by XRSTOR (taken
+from fpstate->user_size).
+
+[ dhansen: tweak subject / changelog ]
+
+Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path")
+Reported-by: Konstantin Bogomolov <bogomolov@google.com>
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Andrei Vagin <avagin@google.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com
+Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -246,12 +246,13 @@ static int __restore_fpregs_from_user(vo
+  * Attempt to restore the FPU registers directly from user memory.
+  * Pagefaults are handled and any errors returned are fatal.
+  */
+-static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
+-                                  bool fx_only, unsigned int size)
++static int restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
+ {
+       struct fpu *fpu = &current->thread.fpu;
+       int ret;
+ 
++      /* Restore enabled features only. */
++      xrestore &= xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
+ retry:
+       fpregs_lock();
+       pagefault_disable();
+@@ -278,7 +279,7 @@ retry:
+               if (ret != -EFAULT)
+                       return -EINVAL;
+ 
+-              if (!fault_in_readable(buf, size))
++              if (!fault_in_readable(buf, fpu_user_xstate_size))
+                       goto retry;
+               return -EFAULT;
+       }
+@@ -303,7 +304,6 @@ retry:
+ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
+                            bool ia32_fxstate)
+ {
+-      int state_size = fpu_kernel_xstate_size;
+       struct task_struct *tsk = current;
+       struct fpu *fpu = &tsk->thread.fpu;
+       struct user_i387_ia32_struct env;
+@@ -319,7 +319,6 @@ static int __fpu_restore_sig(void __user
+                       return ret;
+ 
+               fx_only = !fx_sw_user.magic1;
+-              state_size = fx_sw_user.xstate_size;
+               user_xfeatures = fx_sw_user.xfeatures;
+       } else {
+               user_xfeatures = XFEATURE_MASK_FPSSE;
+@@ -332,8 +331,7 @@ static int __fpu_restore_sig(void __user
+                * faults. If it does, fall back to the slow path below, going
+                * through the kernel buffer with the enabled pagefault handler.
+                */
+-              return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
+-                                              state_size);
++              return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
+       }
+ 
+       /*
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Feb 2024 16:10:08 +0000 (17:10 +0100)
queue-5.15/sched-rt-disallow-writing-invalid-values-to-sched_rt_period_us.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sched-rt-fix-sysctl_sched_rr_timeslice-intial-value.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/x86-fpu-stop-relying-on-userspace-for-info-to-fault-in-xsave-buffer.patch	[new file with mode: 0644]	patch \| blob