From: Sasha Levin Date: Tue, 3 Oct 2023 11:57:09 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v6.5.6~53 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=740b2eee442178b9885c3c9eb2747e785659e7f0;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/cgroup-fix-suspicious-rcu_dereference_check-usage-wa.patch b/queue-5.10/cgroup-fix-suspicious-rcu_dereference_check-usage-wa.patch new file mode 100644 index 00000000000..f69bb829a7e --- /dev/null +++ b/queue-5.10/cgroup-fix-suspicious-rcu_dereference_check-usage-wa.patch @@ -0,0 +1,46 @@ +From a7a1ff80a7a0f1b3823b3862fa05c0eb79cf6290 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Sep 2023 16:14:18 +0300 +Subject: cgroup: Fix suspicious rcu_dereference_check() usage warning + +From: Chengming Zhou + +commit f2aa197e4794bf4c2c0c9570684f86e6fa103e8b upstream. + +task_css_set_check() will use rcu_dereference_check() to check for +rcu_read_lock_held() on the read-side, which is not true after commit +dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock"). This +commit drop explicit rcu_read_lock(), change to RCU-sched read-side +critical section. So fix the RCU warning by adding check for +rcu_read_lock_sched_held(). + +Fixes: dc6e0818bc9a ("sched/cpuacct: Optimize away RCU read lock") +Reported-by: Linux Kernel Functional Testing +Reported-by: syzbot+16e3f2c77e7c5a0113f9@syzkaller.appspotmail.com +Signed-off-by: Chengming Zhou +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Tejun Heo +Tested-by: Zhouyi Zhou +Tested-by: Marek Szyprowski +Link: https://lore.kernel.org/r/20220305034103.57123-1-zhouchengming@bytedance.com +Signed-off-by: Ovidiu Panait +Signed-off-by: Sasha Levin +--- + include/linux/cgroup.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h +index 7653f54189502..c9c430712d471 100644 +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -451,6 +451,7 @@ extern struct mutex cgroup_mutex; + extern spinlock_t css_set_lock; + #define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ ++ rcu_read_lock_sched_held() || \ + lockdep_is_held(&cgroup_mutex) || \ + lockdep_is_held(&css_set_lock) || \ + ((task)->flags & PF_EXITING) || (__c)) +-- +2.40.1 + diff --git a/queue-5.10/perf-build-define-yynomem-as-yynoabort-for-bison-3.8.patch b/queue-5.10/perf-build-define-yynomem-as-yynoabort-for-bison-3.8.patch new file mode 100644 index 00000000000..b52fd72d13a --- /dev/null +++ b/queue-5.10/perf-build-define-yynomem-as-yynoabort-for-bison-3.8.patch @@ -0,0 +1,42 @@ +From ac55ca8ae7a5c096a00d8189f30372b9ceb36733 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 17:26:54 -0300 +Subject: perf build: Define YYNOMEM as YYNOABORT for bison < 3.81 + +From: Arnaldo Carvalho de Melo + +[ Upstream commit 88cc47e24597971b05b6e94c28a2fc81d2a8d61a ] + +YYNOMEM was introduced in bison 3.81, so define it as YYABORT for older +versions, which should provide the previous perf behaviour. + +Cc: Adrian Hunter +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Namhyung Kim +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/Build | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tools/perf/util/Build b/tools/perf/util/Build +index 0cf27354aa451..0f9732d5452e6 100644 +--- a/tools/perf/util/Build ++++ b/tools/perf/util/Build +@@ -253,6 +253,12 @@ ifeq ($(BISON_GE_35),1) + else + bison_flags += -w + endif ++ ++BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381) ++ifeq ($(BISON_LT_381),1) ++ bison_flags += -DYYNOMEM=YYABORT ++endif ++ + CFLAGS_parse-events-bison.o += $(bison_flags) + CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) + CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) +-- +2.40.1 + diff --git a/queue-5.10/sched-cpuacct-fix-charge-percpu-cpuusage.patch b/queue-5.10/sched-cpuacct-fix-charge-percpu-cpuusage.patch new file mode 100644 index 00000000000..33b513dd14e --- /dev/null +++ b/queue-5.10/sched-cpuacct-fix-charge-percpu-cpuusage.patch @@ -0,0 +1,50 @@ +From 42b47adc5d1c7f3bd9823dd12eaaffaee25cef90 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Sep 2023 16:14:16 +0300 +Subject: sched/cpuacct: Fix charge percpu cpuusage + +From: Chengming Zhou + +commit 248cc9993d1cc12b8e9ed716cc3fc09f6c3517dd upstream. + +The cpuacct_account_field() is always called by the current task +itself, so it's ok to use __this_cpu_add() to charge the tick time. + +But cpuacct_charge() maybe called by update_curr() in load_balance() +on a random CPU, different from the CPU on which the task is running. +So __this_cpu_add() will charge that cputime to a random incorrect CPU. + +Fixes: 73e6aafd9ea8 ("sched/cpuacct: Simplify the cpuacct code") +Reported-by: Minye Zhu +Signed-off-by: Chengming Zhou +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Tejun Heo +Link: https://lore.kernel.org/r/20220220051426.5274-1-zhouchengming@bytedance.com +Signed-off-by: Ovidiu Panait +Signed-off-by: Sasha Levin +--- + kernel/sched/cpuacct.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c +index 8a260115a137b..3c59c541dd314 100644 +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -328,12 +328,13 @@ static struct cftype files[] = { + */ + void cpuacct_charge(struct task_struct *tsk, u64 cputime) + { ++ unsigned int cpu = task_cpu(tsk); + struct cpuacct *ca; + + rcu_read_lock(); + + for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) +- __this_cpu_add(*ca->cpuusage, cputime); ++ *per_cpu_ptr(ca->cpuusage, cpu) += cputime; + + rcu_read_unlock(); + } +-- +2.40.1 + diff --git a/queue-5.10/sched-cpuacct-fix-user-system-in-shown-cpuacct.usage.patch b/queue-5.10/sched-cpuacct-fix-user-system-in-shown-cpuacct.usage.patch new file mode 100644 index 00000000000..d0895b7bd5e --- /dev/null +++ b/queue-5.10/sched-cpuacct-fix-user-system-in-shown-cpuacct.usage.patch @@ -0,0 +1,219 @@ +From 29856f2c0c303cdfae2279198ae93c147c1915fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Sep 2023 16:14:15 +0300 +Subject: sched/cpuacct: Fix user/system in shown cpuacct.usage* + +From: Andrey Ryabinin + +commit dd02d4234c9a2214a81c57a16484304a1a51872a upstream. + +cpuacct has 2 different ways of accounting and showing user +and system times. + +The first one uses cpuacct_account_field() to account times +and cpuacct.stat file to expose them. And this one seems to work ok. + +The second one is uses cpuacct_charge() function for accounting and +set of cpuacct.usage* files to show times. Despite some attempts to +fix it in the past it still doesn't work. Sometimes while running KVM +guest the cpuacct_charge() accounts most of the guest time as +system time. This doesn't match with user&system times shown in +cpuacct.stat or proc//stat. + +Demonstration: + # git clone https://github.com/aryabinin/kvmsample + # make + # mkdir /sys/fs/cgroup/cpuacct/test + # echo $$ > /sys/fs/cgroup/cpuacct/test/tasks + # ./kvmsample & + # for i in {1..5}; do cat /sys/fs/cgroup/cpuacct/test/cpuacct.usage_sys; sleep 1; done + 1976535645 + 2979839428 + 3979832704 + 4983603153 + 5983604157 + +Use cpustats accounted in cpuacct_account_field() as the source +of user/sys times for cpuacct.usage* files. Make cpuacct_charge() +to account only summary execution time. + +Fixes: d740037fac70 ("sched/cpuacct: Split usage accounting into user_usage and sys_usage") +Signed-off-by: Andrey Ryabinin +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Daniel Jordan +Acked-by: Tejun Heo +Cc: +Link: https://lore.kernel.org/r/20211115164607.23784-3-arbn@yandex-team.com +[OP: adjusted context for v5.10] +Signed-off-by: Ovidiu Panait +Signed-off-by: Sasha Levin +--- + kernel/sched/cpuacct.c | 79 +++++++++++++++++------------------------- + 1 file changed, 32 insertions(+), 47 deletions(-) + +diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c +index 941c28cf97384..8a260115a137b 100644 +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -21,15 +21,11 @@ static const char * const cpuacct_stat_desc[] = { + [CPUACCT_STAT_SYSTEM] = "system", + }; + +-struct cpuacct_usage { +- u64 usages[CPUACCT_STAT_NSTATS]; +-}; +- + /* track CPU usage of a group of tasks and its child groups */ + struct cpuacct { + struct cgroup_subsys_state css; + /* cpuusage holds pointer to a u64-type object on every CPU */ +- struct cpuacct_usage __percpu *cpuusage; ++ u64 __percpu *cpuusage; + struct kernel_cpustat __percpu *cpustat; + }; + +@@ -49,7 +45,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca) + return css_ca(ca->css.parent); + } + +-static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); ++static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); + static struct cpuacct root_cpuacct = { + .cpustat = &kernel_cpustat, + .cpuusage = &root_cpuacct_cpuusage, +@@ -68,7 +64,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) + if (!ca) + goto out; + +- ca->cpuusage = alloc_percpu(struct cpuacct_usage); ++ ca->cpuusage = alloc_percpu(u64); + if (!ca->cpuusage) + goto out_free_ca; + +@@ -99,7 +95,8 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css) + static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, + enum cpuacct_stat_index index) + { +- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); ++ u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); ++ u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; + u64 data; + + /* +@@ -115,14 +112,17 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, + raw_spin_lock_irq(&cpu_rq(cpu)->lock); + #endif + +- if (index == CPUACCT_STAT_NSTATS) { +- int i = 0; +- +- data = 0; +- for (i = 0; i < CPUACCT_STAT_NSTATS; i++) +- data += cpuusage->usages[i]; +- } else { +- data = cpuusage->usages[index]; ++ switch (index) { ++ case CPUACCT_STAT_USER: ++ data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; ++ break; ++ case CPUACCT_STAT_SYSTEM: ++ data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + ++ cpustat[CPUTIME_SOFTIRQ]; ++ break; ++ case CPUACCT_STAT_NSTATS: ++ data = *cpuusage; ++ break; + } + + #ifndef CONFIG_64BIT +@@ -132,10 +132,14 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, + return data; + } + +-static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) ++static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) + { +- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); +- int i; ++ u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); ++ u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; ++ ++ /* Don't allow to reset global kernel_cpustat */ ++ if (ca == &root_cpuacct) ++ return; + + #ifndef CONFIG_64BIT + /* +@@ -143,9 +147,10 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) + */ + raw_spin_lock_irq(&cpu_rq(cpu)->lock); + #endif +- +- for (i = 0; i < CPUACCT_STAT_NSTATS; i++) +- cpuusage->usages[i] = val; ++ *cpuusage = 0; ++ cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; ++ cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; ++ cpustat[CPUTIME_SOFTIRQ] = 0; + + #ifndef CONFIG_64BIT + raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +@@ -196,7 +201,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, + return -EINVAL; + + for_each_possible_cpu(cpu) +- cpuacct_cpuusage_write(ca, cpu, 0); ++ cpuacct_cpuusage_write(ca, cpu); + + return 0; + } +@@ -243,25 +248,10 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) + seq_puts(m, "\n"); + + for_each_possible_cpu(cpu) { +- struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); +- + seq_printf(m, "%d", cpu); +- +- for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { +-#ifndef CONFIG_64BIT +- /* +- * Take rq->lock to make 64-bit read safe on 32-bit +- * platforms. +- */ +- raw_spin_lock_irq(&cpu_rq(cpu)->lock); +-#endif +- +- seq_printf(m, " %llu", cpuusage->usages[index]); +- +-#ifndef CONFIG_64BIT +- raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +-#endif +- } ++ for (index = 0; index < CPUACCT_STAT_NSTATS; index++) ++ seq_printf(m, " %llu", ++ cpuacct_cpuusage_read(ca, cpu, index)); + seq_puts(m, "\n"); + } + return 0; +@@ -339,16 +329,11 @@ static struct cftype files[] = { + void cpuacct_charge(struct task_struct *tsk, u64 cputime) + { + struct cpuacct *ca; +- int index = CPUACCT_STAT_SYSTEM; +- struct pt_regs *regs = get_irq_regs() ? : task_pt_regs(tsk); +- +- if (regs && user_mode(regs)) +- index = CPUACCT_STAT_USER; + + rcu_read_lock(); + + for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) +- __this_cpu_add(ca->cpuusage->usages[index], cputime); ++ __this_cpu_add(*ca->cpuusage, cputime); + + rcu_read_unlock(); + } +-- +2.40.1 + diff --git a/queue-5.10/sched-cpuacct-optimize-away-rcu-read-lock.patch b/queue-5.10/sched-cpuacct-optimize-away-rcu-read-lock.patch new file mode 100644 index 00000000000..e77e1c17c18 --- /dev/null +++ b/queue-5.10/sched-cpuacct-optimize-away-rcu-read-lock.patch @@ -0,0 +1,66 @@ +From 88617226583184a6e4d79326741cec43648b2f7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Sep 2023 16:14:17 +0300 +Subject: sched/cpuacct: Optimize away RCU read lock + +From: Chengming Zhou + +commit dc6e0818bc9a0336d9accf3ea35d146d72aa7a18 upstream. + +Since cpuacct_charge() is called from the scheduler update_curr(), +we must already have rq lock held, then the RCU read lock can +be optimized away. + +And do the same thing in it's wrapper cgroup_account_cputime(), +but we can't use lockdep_assert_rq_held() there, which defined +in kernel/sched/sched.h. + +Suggested-by: Peter Zijlstra (Intel) +Signed-off-by: Chengming Zhou +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220220051426.5274-2-zhouchengming@bytedance.com +[OP: adjusted lockdep_assert_rq_held() -> lockdep_assert_held()] +Signed-off-by: Ovidiu Panait +Signed-off-by: Sasha Levin +--- + include/linux/cgroup.h | 2 -- + kernel/sched/cpuacct.c | 4 +--- + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h +index 959b370733f09..7653f54189502 100644 +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -779,11 +779,9 @@ static inline void cgroup_account_cputime(struct task_struct *task, + + cpuacct_charge(task, delta_exec); + +- rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (cgroup_parent(cgrp)) + __cgroup_account_cputime(cgrp, delta_exec); +- rcu_read_unlock(); + } + + static inline void cgroup_account_cputime_field(struct task_struct *task, +diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c +index 3c59c541dd314..8ee298321d78b 100644 +--- a/kernel/sched/cpuacct.c ++++ b/kernel/sched/cpuacct.c +@@ -331,12 +331,10 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) + unsigned int cpu = task_cpu(tsk); + struct cpuacct *ca; + +- rcu_read_lock(); ++ lockdep_assert_held(&cpu_rq(cpu)->lock); + + for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) + *per_cpu_ptr(ca->cpuusage, cpu) += cputime; +- +- rcu_read_unlock(); + } + + /* +-- +2.40.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 5c0683eef36..205a25651ee 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -128,3 +128,8 @@ media-vb2-frame_vector.c-replace-warn_once-with-a-co.patch powerpc-watchpoints-disable-preemption-in-thread_cha.patch ncsi-propagate-carrier-gain-loss-events-to-the-ncsi-.patch fbdev-sh7760fb-depend-on-fb-y.patch +perf-build-define-yynomem-as-yynoabort-for-bison-3.8.patch +sched-cpuacct-fix-user-system-in-shown-cpuacct.usage.patch +sched-cpuacct-fix-charge-percpu-cpuusage.patch +sched-cpuacct-optimize-away-rcu-read-lock.patch +cgroup-fix-suspicious-rcu_dereference_check-usage-wa.patch