From: Greg Kroah-Hartman Date: Mon, 13 May 2019 07:06:16 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v5.1.2~43 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c229e00889519609fade36946c60ce48c995d8bd;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: mm-vmstat-make-quiet_vmstat-lighter.patch --- diff --git a/queue-4.4/mm-vmstat-make-quiet_vmstat-lighter.patch b/queue-4.4/mm-vmstat-make-quiet_vmstat-lighter.patch new file mode 100644 index 00000000000..7004df8bedb --- /dev/null +++ b/queue-4.4/mm-vmstat-make-quiet_vmstat-lighter.patch @@ -0,0 +1,176 @@ +From f01f17d3705bb6081c9e5728078f64067982be36 Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Fri, 5 Feb 2016 15:36:24 -0800 +Subject: mm, vmstat: make quiet_vmstat lighter + +From: Michal Hocko + +commit f01f17d3705bb6081c9e5728078f64067982be36 upstream. + +Mike has reported a considerable overhead of refresh_cpu_vm_stats from +the idle entry during pipe test: + + 12.89% [kernel] [k] refresh_cpu_vm_stats.isra.12 + 4.75% [kernel] [k] __schedule + 4.70% [kernel] [k] mutex_unlock + 3.14% [kernel] [k] __switch_to + +This is caused by commit 0eb77e988032 ("vmstat: make vmstat_updater +deferrable again and shut down on idle") which has placed quiet_vmstat +into cpu_idle_loop. The main reason here seems to be that the idle +entry has to get over all zones and perform atomic operations for each +vmstat entry even though there might be no per cpu diffs. This is a +pointless overhead for _each_ idle entry. + +Make sure that quiet_vmstat is as light as possible. + +First of all it doesn't make any sense to do any local sync if the +current cpu is already set in oncpu_stat_off because vmstat_update puts +itself there only if there is nothing to do. + +Then we can check need_update which should be a cheap way to check for +potential per-cpu diffs and only then do refresh_cpu_vm_stats. + +The original patch also did cancel_delayed_work which we are not doing +here. There are two reasons for that. Firstly cancel_delayed_work from +idle context will blow up on RT kernels (reported by Mike): + + CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.5.0-rt3 #7 + Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.20C 09/23/2013 + Call Trace: + dump_stack+0x49/0x67 + ___might_sleep+0xf5/0x180 + rt_spin_lock+0x20/0x50 + try_to_grab_pending+0x69/0x240 + cancel_delayed_work+0x26/0xe0 + quiet_vmstat+0x75/0xa0 + cpu_idle_loop+0x38/0x3e0 + cpu_startup_entry+0x13/0x20 + start_secondary+0x114/0x140 + +And secondly, even on !RT kernels it might add some non trivial overhead +which is not necessary. Even if the vmstat worker wakes up and preempts +idle then it will be most likely a single shot noop because the stats +were already synced and so it would end up on the oncpu_stat_off anyway. +We just need to teach both vmstat_shepherd and vmstat_update to stop +scheduling the worker if there is nothing to do. + +[mgalbraith@suse.de: cancel pending work of the cpu_stat_off CPU] +Signed-off-by: Michal Hocko +Reported-by: Mike Galbraith +Acked-by: Christoph Lameter +Signed-off-by: Mike Galbraith +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +Signed-off-by: Daniel Wagner + +--- + mm/vmstat.c | 68 ++++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 46 insertions(+), 22 deletions(-) + +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1395,10 +1395,15 @@ static void vmstat_update(struct work_st + * Counters were updated so we expect more updates + * to occur in the future. Keep on running the + * update worker thread. ++ * If we were marked on cpu_stat_off clear the flag ++ * so that vmstat_shepherd doesn't schedule us again. + */ +- queue_delayed_work_on(smp_processor_id(), vmstat_wq, +- this_cpu_ptr(&vmstat_work), +- round_jiffies_relative(sysctl_stat_interval)); ++ if (!cpumask_test_and_clear_cpu(smp_processor_id(), ++ cpu_stat_off)) { ++ queue_delayed_work_on(smp_processor_id(), vmstat_wq, ++ this_cpu_ptr(&vmstat_work), ++ round_jiffies_relative(sysctl_stat_interval)); ++ } + } else { + /* + * We did not update any counters so the app may be in +@@ -1426,18 +1431,6 @@ static void vmstat_update(struct work_st + * until the diffs stay at zero. The function is used by NOHZ and can only be + * invoked when tick processing is not active. + */ +-void quiet_vmstat(void) +-{ +- if (system_state != SYSTEM_RUNNING) +- return; +- +- do { +- if (!cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off)) +- cancel_delayed_work(this_cpu_ptr(&vmstat_work)); +- +- } while (refresh_cpu_vm_stats(false)); +-} +- + /* + * Check if the diffs for a certain cpu indicate that + * an update is needed. +@@ -1461,6 +1454,30 @@ static bool need_update(int cpu) + return false; + } + ++void quiet_vmstat(void) ++{ ++ if (system_state != SYSTEM_RUNNING) ++ return; ++ ++ /* ++ * If we are already in hands of the shepherd then there ++ * is nothing for us to do here. ++ */ ++ if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off)) ++ return; ++ ++ if (!need_update(smp_processor_id())) ++ return; ++ ++ /* ++ * Just refresh counters and do not care about the pending delayed ++ * vmstat_update. It doesn't fire that often to matter and canceling ++ * it would be too expensive from this path. ++ * vmstat_shepherd will take care about that for us. ++ */ ++ refresh_cpu_vm_stats(false); ++} ++ + + /* + * Shepherd worker thread that checks the +@@ -1478,18 +1495,25 @@ static void vmstat_shepherd(struct work_ + + get_online_cpus(); + /* Check processors whose vmstat worker threads have been disabled */ +- for_each_cpu(cpu, cpu_stat_off) +- if (need_update(cpu) && +- cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) +- +- queue_delayed_work_on(cpu, vmstat_wq, +- &per_cpu(vmstat_work, cpu), 0); ++ for_each_cpu(cpu, cpu_stat_off) { ++ struct delayed_work *dw = &per_cpu(vmstat_work, cpu); + ++ if (need_update(cpu)) { ++ if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) ++ queue_delayed_work_on(cpu, vmstat_wq, dw, 0); ++ } else { ++ /* ++ * Cancel the work if quiet_vmstat has put this ++ * cpu on cpu_stat_off because the work item might ++ * be still scheduled ++ */ ++ cancel_delayed_work(dw); ++ } ++ } + put_online_cpus(); + + schedule_delayed_work(&shepherd, + round_jiffies_relative(sysctl_stat_interval)); +- + } + + static void __init start_shepherd_timer(void) diff --git a/queue-4.4/series b/queue-4.4/series index 78f13d9e2ae..8ec3277f25f 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -157,3 +157,4 @@ ipv6-fix-a-potential-deadlock-in-do_ipv6_setsockopt.patch asoc-intel-avoid-oops-if-dma-setup-fails.patch timer-debug-change-proc-timer_stats-from-0644-to-0600.patch netfilter-compat-initialize-all-fields-in-xt_init.patch +mm-vmstat-make-quiet_vmstat-lighter.patch