]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | Subject: add affinity_load_balancing sysctl |
2 | From: kraxel@suse.de | |
3 | References: 176738 | |
4 | ||
5 | Add a sysctl to tweak how the kernel initially schedules threads to cpus. | |
6 | ||
7 | By default the kernel tries to keep threads on the local cpu (and local | |
8 | node on NUMA machines). Depending on the application this may not deliver | |
9 | the best performance, especially applications with a large working set for | |
10 | each thread tend to perform better when being scheduled to different nodes | |
11 | because they can use caches of multiple nodes then. | |
12 | ||
13 | With this sysctl enabled the kernel will spread threads over the cpus given | |
14 | and doesn't try to keep them local. | |
15 | ||
16 | usage: | |
17 | - set sysctl kernel.affinity_load_balancing = 1 | |
18 | - use taskset or numactl to specify which cpus your task should be | |
19 | scheduled on. | |
20 | ||
21 | --- | |
22 | kernel/sched.c | 28 ++++++++++++++++++++++++++++ | |
23 | kernel/sysctl.c | 12 ++++++++++++ | |
24 | 2 files changed, 40 insertions(+) | |
25 | ||
26 | --- a/kernel/sched.c | |
27 | +++ b/kernel/sched.c | |
28 | @@ -2107,6 +2107,28 @@ find_idlest_cpu(struct sched_group *grou | |
29 | return idlest; | |
30 | } | |
31 | ||
32 | +static int | |
33 | +find_idlest_cpu_nodomain(struct task_struct *p, int this_cpu) | |
34 | +{ | |
35 | + cpumask_t tmp; | |
36 | + unsigned long load, min_load = ULONG_MAX; | |
37 | + int idlest = -1; | |
38 | + int i; | |
39 | + | |
40 | + /* Traverse only the allowed CPUs */ | |
41 | + cpus_and(tmp, cpu_online_map, p->cpus_allowed); | |
42 | + | |
43 | + for_each_cpu_mask(i, tmp) { | |
44 | + load = target_load(i, 1); | |
45 | + | |
46 | + if (load < min_load) { | |
47 | + min_load = load; | |
48 | + idlest = i; | |
49 | + } | |
50 | + } | |
51 | + return idlest; | |
52 | +} | |
53 | + | |
54 | /* | |
55 | * sched_balance_self: balance the current task (running on cpu) in domains | |
56 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | |
57 | @@ -2118,11 +2140,17 @@ find_idlest_cpu(struct sched_group *grou | |
58 | * | |
59 | * preempt must be disabled. | |
60 | */ | |
61 | + | |
62 | +int affinity_load_balancing = 0; | |
63 | + | |
64 | static int sched_balance_self(int cpu, int flag) | |
65 | { | |
66 | struct task_struct *t = current; | |
67 | struct sched_domain *tmp, *sd = NULL; | |
68 | ||
69 | + if (affinity_load_balancing && !cpus_full(t->cpus_allowed)) | |
70 | + return find_idlest_cpu_nodomain(t, cpu); | |
71 | + | |
72 | for_each_domain(cpu, tmp) { | |
73 | /* | |
74 | * If power savings logic is enabled for a domain, stop there. | |
75 | --- a/kernel/sysctl.c | |
76 | +++ b/kernel/sysctl.c | |
77 | @@ -237,6 +237,8 @@ static int min_wakeup_granularity_ns; | |
78 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | |
79 | #endif | |
80 | ||
81 | +extern int affinity_load_balancing; | |
82 | + | |
83 | static struct ctl_table kern_table[] = { | |
84 | #ifdef CONFIG_SCHED_DEBUG | |
85 | { | |
86 | @@ -872,6 +874,16 @@ static struct ctl_table kern_table[] = { | |
87 | * NOTE: do not add new entries to this table unless you have read | |
88 | * Documentation/sysctl/ctl_unnumbered.txt | |
89 | */ | |
90 | +#ifdef CONFIG_SMP | |
91 | + { | |
92 | + .ctl_name = CTL_UNNUMBERED, | |
93 | + .procname = "affinity_load_balancing", | |
94 | + .data = &affinity_load_balancing, | |
95 | + .maxlen = sizeof(affinity_load_balancing), | |
96 | + .mode = 0644, | |
97 | + .proc_handler = &proc_dointvec, | |
98 | + }, | |
99 | +#endif | |
100 | { .ctl_name = 0 } | |
101 | }; | |
102 |