/*
* Simple CPU accounting cgroup controller
*/
+#include <linux/sched/clock.h>
#include <linux/sched/cputime.h>
#include <linux/tsacct_kern.h>
#include "sched.h"
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_NO_HZ_COMMON
-void kcpustat_dyntick_start(void)
+static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now)
{
- if (!vtime_generic_enabled_this_cpu()) {
- vtime_dyntick_start();
- __this_cpu_write(kernel_cpustat.idle_dyntick, 1);
- }
+ u64 *cpustat = kc->cpustat;
+ u64 delta;
+
+ if (!kc->idle_elapse)
+ return;
+
+ delta = now - kc->idle_entrytime;
+
+ write_seqcount_begin(&kc->idle_sleeptime_seq);
+ if (nr_iowait_cpu(smp_processor_id()) > 0)
+ cpustat[CPUTIME_IOWAIT] += delta;
+ else
+ cpustat[CPUTIME_IDLE] += delta;
+
+ kc->idle_entrytime = now;
+ kc->idle_elapse = false;
+ write_seqcount_end(&kc->idle_sleeptime_seq);
}
-void kcpustat_dyntick_stop(void)
+static void kcpustat_idle_start(struct kernel_cpustat *kc, u64 now)
{
+ write_seqcount_begin(&kc->idle_sleeptime_seq);
+ kc->idle_entrytime = now;
+ kc->idle_elapse = true;
+ write_seqcount_end(&kc->idle_sleeptime_seq);
+}
+
+void kcpustat_dyntick_stop(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
if (!vtime_generic_enabled_this_cpu()) {
- __this_cpu_write(kernel_cpustat.idle_dyntick, 0);
+ WARN_ON_ONCE(!kc->idle_dyntick);
+ kcpustat_idle_stop(kc, now);
+ kc->idle_dyntick = false;
vtime_dyntick_stop();
steal_account_process_time(ULONG_MAX);
}
}
+
+void kcpustat_dyntick_start(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu()) {
+ vtime_dyntick_start();
+ kc->idle_dyntick = true;
+ kcpustat_idle_start(kc, now);
+ }
+}
+
+void kcpustat_irq_enter(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu())
+ kcpustat_idle_stop(kc, now);
+}
+
+void kcpustat_irq_exit(u64 now)
+{
+ struct kernel_cpustat *kc = kcpustat_this_cpu;
+
+ if (!vtime_generic_enabled_this_cpu())
+ kcpustat_idle_start(kc, now);
+}
+
+static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
+ bool compute_delta, u64 *last_update_time)
+{
+ struct kernel_cpustat *kc = &kcpustat_cpu(cpu);
+ u64 *cpustat = kc->cpustat;
+ unsigned int seq;
+ ktime_t now;
+ u64 idle;
+
+ now = ktime_get();
+ if (last_update_time)
+ *last_update_time = ktime_to_us(now);
+
+ if (vtime_generic_enabled_cpu(cpu)) {
+ idle = kcpustat_field(idx, cpu);
+ goto to_us;
+ }
+
+ do {
+ seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
+
+ idle = cpustat[idx];
+ if (kc->idle_elapse && compute_delta && now > kc->idle_entrytime)
+ idle += (now - kc->idle_entrytime);
+ } while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
+
+to_us:
+ do_div(idle, NSEC_PER_USEC);
+
+ return idle;
+}
+
+/**
+ * get_cpu_idle_time_us - get the total idle time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative idle time (since boot) for a given
+ * CPU, in microseconds. Note that this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * Return: -1 if generic vtime is enabled, else total idle time of the @cpu
+ */
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+ return get_cpu_sleep_time_us(cpu, CPUTIME_IDLE,
+ !nr_iowait_cpu(cpu), last_update_time);
+}
+EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
+
+/**
+ * get_cpu_iowait_time_us - get the total iowait time of a CPU
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in. Do not update
+ * counters if NULL.
+ *
+ * Return the cumulative iowait time (since boot) for a given
+ * CPU, in microseconds. Note this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * Return: -1 if generic vtime is enabled, else total iowait time of @cpu
+ */
+u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
+{
+ return get_cpu_sleep_time_us(cpu, CPUTIME_IOWAIT,
+ nr_iowait_cpu(cpu), last_update_time);
+}
+EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
+
#endif /* CONFIG_NO_HZ_COMMON */
/*
touch_softlockup_watchdog_sched();
}
-static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- ktime_t delta;
-
- if (vtime_generic_enabled_this_cpu())
- return;
-
- if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)))
- return;
-
- delta = ktime_sub(now, ts->idle_entrytime);
-
- write_seqcount_begin(&ts->idle_sleeptime_seq);
- if (nr_iowait_cpu(smp_processor_id()) > 0)
- cpustat[CPUTIME_IOWAIT] = ktime_add(cpustat[CPUTIME_IOWAIT], delta);
- else
- cpustat[CPUTIME_IDLE] = ktime_add(cpustat[CPUTIME_IDLE], delta);
-
- ts->idle_entrytime = now;
- tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
- write_seqcount_end(&ts->idle_sleeptime_seq);
-
- sched_clock_idle_wakeup_event();
-}
-
-static void tick_nohz_start_idle(struct tick_sched *ts)
-{
- if (vtime_generic_enabled_this_cpu())
- return;
-
- write_seqcount_begin(&ts->idle_sleeptime_seq);
- ts->idle_entrytime = ktime_get();
- tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
- write_seqcount_end(&ts->idle_sleeptime_seq);
- sched_clock_idle_sleep_event();
-}
-
-static u64 get_cpu_sleep_time_us(int cpu, enum cpu_usage_stat idx,
- bool compute_delta, u64 *last_update_time)
-{
- struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- u64 *cpustat = kcpustat_cpu(cpu).cpustat;
- ktime_t now, idle;
- unsigned int seq;
-
- now = ktime_get();
- if (last_update_time)
- *last_update_time = ktime_to_us(now);
-
- if (vtime_generic_enabled_cpu(cpu)) {
- idle = kcpustat_field(idx, cpu);
- return ktime_to_us(idle);
- }
-
- do {
- ktime_t delta = 0;
-
- seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
-
- if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) {
- if (now > ts->idle_entrytime)
- delta = ktime_sub(now, ts->idle_entrytime);
- }
-
- idle = ktime_add(cpustat[idx], delta);
- } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));
-
- return ktime_to_us(idle);
-
-}
-
-/**
- * get_cpu_idle_time_us - get the total idle time of a CPU
- * @cpu: CPU number to query
- * @last_update_time: variable to store update time in. Do not update
- * counters if NULL.
- *
- * Return the cumulative idle time (since boot) for a given
- * CPU, in microseconds. Note that this is partially broken due to
- * the counter of iowait tasks that can be remotely updated without
- * any synchronization. Therefore it is possible to observe backward
- * values within two consecutive reads.
- *
- * This time is measured via accounting rather than sampling,
- * and is as accurate as ktime_get() is.
- *
- * Return: -1 if generic vtime is enabled, else total idle time of the @cpu
- */
-u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
-{
- return get_cpu_sleep_time_us(cpu, CPUTIME_IDLE,
- !nr_iowait_cpu(cpu), last_update_time);
-}
-EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
-
-/**
- * get_cpu_iowait_time_us - get the total iowait time of a CPU
- * @cpu: CPU number to query
- * @last_update_time: variable to store update time in. Do not update
- * counters if NULL.
- *
- * Return the cumulative iowait time (since boot) for a given
- * CPU, in microseconds. Note this is partially broken due to
- * the counter of iowait tasks that can be remotely updated without
- * any synchronization. Therefore it is possible to observe backward
- * values within two consecutive reads.
- *
- * This time is measured via accounting rather than sampling,
- * and is as accurate as ktime_get() is.
- *
- * Return: -1 if generic vtime is enabled, else total iowait time of @cpu
- */
-u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
-{
- return get_cpu_sleep_time_us(cpu, CPUTIME_IOWAIT,
- nr_iowait_cpu(cpu), last_update_time);
-}
-EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
-
/* Simplified variant of hrtimer_forward_now() */
static ktime_t tick_forward_now(ktime_t expires, ktime_t now)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
}
+static void tick_nohz_clock_sleep(struct tick_sched *ts)
+{
+ tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
+ sched_clock_idle_sleep_event();
+}
+
+static void tick_nohz_clock_wakeup(struct tick_sched *ts)
+{
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) {
+ tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
+ sched_clock_idle_wakeup_event();
+ }
+}
+
/**
* tick_nohz_idle_enter - prepare for entering idle on the current CPU
*
local_irq_disable();
ts = this_cpu_ptr(&tick_cpu_sched);
-
WARN_ON_ONCE(ts->timer_expires_base);
-
tick_sched_flag_set(ts, TS_FLAG_INIDLE);
- kcpustat_dyntick_start();
- tick_nohz_start_idle(ts);
+ ts->idle_entrytime = ktime_get();
+ kcpustat_dyntick_start(ts->idle_entrytime);
+ tick_nohz_clock_sleep(ts);
local_irq_enable();
}
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
- tick_nohz_start_idle(ts);
- else
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE)) {
+ ts->idle_entrytime = ktime_get();
+ kcpustat_irq_exit(ts->idle_entrytime);
+ tick_nohz_clock_sleep(ts);
+ } else {
tick_nohz_full_update_tick(ts);
+ }
}
/**
now = ktime_get();
if (idle_active)
- tick_nohz_stop_idle(ts, now);
+ tick_nohz_clock_wakeup(ts);
if (tick_stopped)
tick_nohz_idle_update_tick(ts, now);
- kcpustat_dyntick_stop();
+ kcpustat_dyntick_stop(now);
local_irq_enable();
}
if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE))
return;
+
now = ktime_get();
- if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))
- tick_nohz_stop_idle(ts, now);
+
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)) {
+ tick_nohz_clock_wakeup(ts);
+ kcpustat_irq_enter(now);
+ }
+
/*
* If all CPUs are idle we may need to update a stale jiffies value.
* Note nohz_full is a special case: a timekeeper is guaranteed to stay