EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+DEFINE_PER_CPU(struct rnd_state, sched_rnd_state);
#ifdef CONFIG_SCHED_PROXY_EXEC
DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
{
sched_init_numa(NUMA_NO_NODE);
+ prandom_init_once(&sched_rnd_state);
+
/*
* There's no userspace yet to cause hotplug operations; hence all the
* CPU masks are stable and all blatant races in the below code cannot
max_load_balance_interval = HZ*num_online_cpus()/10;
}
-static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost)
+static inline void update_newidle_stats(struct sched_domain *sd, unsigned int success)
+{
+ sd->newidle_call++;
+ sd->newidle_success += success;
+
+ if (sd->newidle_call >= 1024) {
+ sd->newidle_ratio = sd->newidle_success;
+ sd->newidle_call /= 2;
+ sd->newidle_success /= 2;
+ }
+}
+
+static inline bool
+update_newidle_cost(struct sched_domain *sd, u64 cost, unsigned int success)
{
unsigned long next_decay = sd->last_decay_max_lb_cost + HZ;
unsigned long now = jiffies;
+ if (cost)
+ update_newidle_stats(sd, success);
+
if (cost > sd->max_newidle_lb_cost) {
/*
* Track max cost of a domain to make sure to not delay the
* Decay the newidle max times here because this is a regular
* visit to all the domains.
*/
- need_decay = update_newidle_cost(sd, 0);
+ need_decay = update_newidle_cost(sd, 0, 0);
max_cost += sd->max_newidle_lb_cost;
/*
break;
if (sd->flags & SD_BALANCE_NEWIDLE) {
+ unsigned int weight = 1;
+
+ if (sched_feat(NI_RANDOM)) {
+ /*
+ * Throw a 1k sided dice; and only run
+ * newidle_balance according to the success
+ * rate.
+ */
+ u32 d1k = sched_rng() % 1024;
+ weight = 1 + sd->newidle_ratio;
+ if (d1k > weight) {
+ update_newidle_stats(sd, 0);
+ continue;
+ }
+ weight = (1024 + weight/2) / weight;
+ }
pulled_task = sched_balance_rq(this_cpu, this_rq,
sd, CPU_NEWLY_IDLE,
t1 = sched_clock_cpu(this_cpu);
domain_cost = t1 - t0;
- update_newidle_cost(sd, domain_cost);
-
curr_cost += domain_cost;
t0 = t1;
+
+ /*
+ * Track max cost of a domain to make sure to not delay the
+ * next wakeup on the CPU.
+ */
+ update_newidle_cost(sd, domain_cost, weight * !!pulled_task);
}
/*
#ifndef _KERNEL_SCHED_SCHED_H
#define _KERNEL_SCHED_SCHED_H
+#include <linux/prandom.h>
#include <linux/sched/affinity.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/cpufreq.h>
}
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+DECLARE_PER_CPU(struct rnd_state, sched_rnd_state);
+
+static inline u32 sched_rng(void)
+{
+ return prandom_u32_state(this_cpu_ptr(&sched_rnd_state));
+}
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() this_cpu_ptr(&runqueues)