1 Subject: add affinity_load_balancing sysctl
5 Add a sysctl to tweak how the kernel initially schedules threads to cpus.
7 By default the kernel tries to keep threads on the local cpu (and local
8 node on NUMA machines). Depending on the application this may not deliver
9 the best performance, especially applications with a large working set for
10 each thread tend to perform better when being scheduled to different nodes
11 because they can use caches of multiple nodes then.
13 With this sysctl enabled the kernel will spread threads over the cpus given
14 and doesn't try to keep them local.
17 - set sysctl kernel.affinity_load_balancing = 1
18 - use taskset or numactl to specify which cpus your task should be
22 kernel/sched.c | 28 ++++++++++++++++++++++++++++
23 kernel/sysctl.c | 12 ++++++++++++
24 2 files changed, 40 insertions(+)
28 @@ -2107,6 +2107,28 @@ find_idlest_cpu(struct sched_group *grou
33 +find_idlest_cpu_nodomain(struct task_struct *p, int this_cpu)
36 + unsigned long load, min_load = ULONG_MAX;
40 + /* Traverse only the allowed CPUs */
41 + cpus_and(tmp, cpu_online_map, p->cpus_allowed);
43 + for_each_cpu_mask(i, tmp) {
44 + load = target_load(i, 1);
46 + if (load < min_load) {
55 * sched_balance_self: balance the current task (running on cpu) in domains
56 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
57 @@ -2118,11 +2140,17 @@ find_idlest_cpu(struct sched_group *grou
59 * preempt must be disabled.
62 +int affinity_load_balancing = 0;
64 static int sched_balance_self(int cpu, int flag)
66 struct task_struct *t = current;
67 struct sched_domain *tmp, *sd = NULL;
69 + if (affinity_load_balancing && !cpus_full(t->cpus_allowed))
70 + return find_idlest_cpu_nodomain(t, cpu);
72 for_each_domain(cpu, tmp) {
74 * If power savings logic is enabled for a domain, stop there.
77 @@ -237,6 +237,8 @@ static int min_wakeup_granularity_ns;
78 static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
81 +extern int affinity_load_balancing;
83 static struct ctl_table kern_table[] = {
84 #ifdef CONFIG_SCHED_DEBUG
86 @@ -872,6 +874,16 @@ static struct ctl_table kern_table[] = {
87 * NOTE: do not add new entries to this table unless you have read
88 * Documentation/sysctl/ctl_unnumbered.txt
92 + .ctl_name = CTL_UNNUMBERED,
93 + .procname = "affinity_load_balancing",
94 + .data = &affinity_load_balancing,
95 + .maxlen = sizeof(affinity_load_balancing),
97 + .proc_handler = &proc_dointvec,