]>
Commit | Line | Data |
---|---|---|
0332c2d4 ME |
1 | /* |
2 | * pseries CPU Hotplug infrastructure. | |
3 | * | |
413f7c40 ME |
4 | * Split out from arch/powerpc/platforms/pseries/setup.c |
5 | * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c | |
0332c2d4 ME |
6 | * |
7 | * Peter Bergner, IBM March 2001. | |
8 | * Copyright (C) 2001 IBM. | |
413f7c40 ME |
9 | * Dave Engebretsen, Peter Bergner, and |
10 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
11 | * Plus various changes from other IBM teams... | |
0332c2d4 ME |
12 | * |
13 | * Copyright (C) 2006 Michael Ellerman, IBM Corporation | |
14 | * | |
15 | * This program is free software; you can redistribute it and/or | |
16 | * modify it under the terms of the GNU General Public License | |
17 | * as published by the Free Software Foundation; either version | |
18 | * 2 of the License, or (at your option) any later version. | |
19 | */ | |
20 | ||
21 | #include <linux/kernel.h> | |
22 | #include <linux/delay.h> | |
23 | #include <linux/cpu.h> | |
24 | #include <asm/system.h> | |
25 | #include <asm/prom.h> | |
26 | #include <asm/rtas.h> | |
27 | #include <asm/firmware.h> | |
28 | #include <asm/machdep.h> | |
29 | #include <asm/vdso_datapage.h> | |
30 | #include <asm/pSeries_reconfig.h> | |
31 | #include "xics.h" | |
473980a9 | 32 | #include "plpar_wrappers.h" |
3aa565f5 | 33 | #include "offline_states.h" |
0332c2d4 ME |
34 | |
35 | /* This version can't take the spinlock, because it never returns */ | |
36 | static struct rtas_args rtas_stop_self_args = { | |
37 | .token = RTAS_UNKNOWN_SERVICE, | |
38 | .nargs = 0, | |
39 | .nret = 1, | |
40 | .rets = &rtas_stop_self_args.args[0], | |
41 | }; | |
42 | ||
3aa565f5 GS |
43 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = |
44 | CPU_STATE_OFFLINE; | |
45 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | |
46 | ||
47 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | |
48 | ||
49 | static int cede_offline_enabled __read_mostly = 1; | |
50 | ||
51 | /* | |
52 | * Enable/disable cede_offline when available. | |
53 | */ | |
54 | static int __init setup_cede_offline(char *str) | |
55 | { | |
56 | if (!strcmp(str, "off")) | |
57 | cede_offline_enabled = 0; | |
58 | else if (!strcmp(str, "on")) | |
59 | cede_offline_enabled = 1; | |
60 | else | |
61 | return 0; | |
62 | return 1; | |
63 | } | |
64 | ||
65 | __setup("cede_offline=", setup_cede_offline); | |
66 | ||
67 | enum cpu_state_vals get_cpu_current_state(int cpu) | |
68 | { | |
69 | return per_cpu(current_state, cpu); | |
70 | } | |
71 | ||
72 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | |
73 | { | |
74 | per_cpu(current_state, cpu) = state; | |
75 | } | |
76 | ||
77 | enum cpu_state_vals get_preferred_offline_state(int cpu) | |
78 | { | |
79 | return per_cpu(preferred_offline_state, cpu); | |
80 | } | |
81 | ||
82 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | |
83 | { | |
84 | per_cpu(preferred_offline_state, cpu) = state; | |
85 | } | |
86 | ||
87 | void set_default_offline_state(int cpu) | |
88 | { | |
89 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | |
90 | } | |
91 | ||
04da6af9 | 92 | static void rtas_stop_self(void) |
0332c2d4 ME |
93 | { |
94 | struct rtas_args *args = &rtas_stop_self_args; | |
95 | ||
96 | local_irq_disable(); | |
97 | ||
98 | BUG_ON(args->token == RTAS_UNKNOWN_SERVICE); | |
99 | ||
100 | printk("cpu %u (hwid %u) Ready to die...\n", | |
101 | smp_processor_id(), hard_smp_processor_id()); | |
102 | enter_rtas(__pa(args)); | |
103 | ||
104 | panic("Alas, I survived.\n"); | |
105 | } | |
106 | ||
06ba30b6 | 107 | static void pseries_mach_cpu_die(void) |
04da6af9 | 108 | { |
3aa565f5 GS |
109 | unsigned int cpu = smp_processor_id(); |
110 | unsigned int hwcpu = hard_smp_processor_id(); | |
111 | u8 cede_latency_hint = 0; | |
112 | ||
04da6af9 ME |
113 | local_irq_disable(); |
114 | idle_task_exit(); | |
c3e8506c | 115 | xics_teardown_cpu(); |
3aa565f5 GS |
116 | |
117 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | |
118 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | |
32d8ad4e BK |
119 | if (ppc_md.suspend_disable_cpu) |
120 | ppc_md.suspend_disable_cpu(); | |
121 | ||
3aa565f5 GS |
122 | cede_latency_hint = 2; |
123 | ||
124 | get_lppaca()->idle = 1; | |
125 | if (!get_lppaca()->shared_proc) | |
126 | get_lppaca()->donate_dedicated_cpu = 1; | |
127 | ||
3aa565f5 GS |
128 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
129 | extended_cede_processor(cede_latency_hint); | |
3aa565f5 GS |
130 | } |
131 | ||
3aa565f5 GS |
132 | if (!get_lppaca()->shared_proc) |
133 | get_lppaca()->donate_dedicated_cpu = 0; | |
134 | get_lppaca()->idle = 0; | |
3aa565f5 | 135 | |
0212f260 VS |
136 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { |
137 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | |
3aa565f5 | 138 | |
0212f260 VS |
139 | /* |
140 | * Call to start_secondary_resume() will not return. | |
141 | * Kernel stack will be reset and start_secondary() | |
142 | * will be called to continue the online operation. | |
143 | */ | |
144 | start_secondary_resume(); | |
145 | } | |
146 | } | |
3aa565f5 | 147 | |
0212f260 VS |
148 | /* Requested state is CPU_STATE_OFFLINE at this point */ |
149 | WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); | |
3aa565f5 | 150 | |
0212f260 VS |
151 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); |
152 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | |
153 | rtas_stop_self(); | |
3aa565f5 | 154 | |
04da6af9 ME |
155 | /* Should never get here... */ |
156 | BUG(); | |
157 | for(;;); | |
158 | } | |
159 | ||
06ba30b6 | 160 | static int pseries_cpu_disable(void) |
413f7c40 ME |
161 | { |
162 | int cpu = smp_processor_id(); | |
163 | ||
ea0f1cab | 164 | set_cpu_online(cpu, false); |
413f7c40 ME |
165 | vdso_data->processorCount--; |
166 | ||
167 | /*fix boot_cpuid here*/ | |
168 | if (cpu == boot_cpuid) | |
8729faaa | 169 | boot_cpuid = cpumask_any(cpu_online_mask); |
413f7c40 ME |
170 | |
171 | /* FIXME: abstract this to not be platform specific later on */ | |
172 | xics_migrate_irqs_away(); | |
173 | return 0; | |
174 | } | |
175 | ||
3aa565f5 GS |
176 | /* |
177 | * pseries_cpu_die: Wait for the cpu to die. | |
178 | * @cpu: logical processor id of the CPU whose death we're awaiting. | |
179 | * | |
180 | * This function is called from the context of the thread which is performing | |
181 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | |
182 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | |
183 | * notifications. | |
184 | * | |
185 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | |
186 | * self-destruct. | |
187 | */ | |
06ba30b6 | 188 | static void pseries_cpu_die(unsigned int cpu) |
413f7c40 ME |
189 | { |
190 | int tries; | |
3aa565f5 | 191 | int cpu_status = 1; |
413f7c40 ME |
192 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
193 | ||
3aa565f5 GS |
194 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
195 | cpu_status = 1; | |
940ce422 | 196 | for (tries = 0; tries < 5000; tries++) { |
3aa565f5 GS |
197 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
198 | cpu_status = 0; | |
199 | break; | |
200 | } | |
940ce422 | 201 | msleep(1); |
3aa565f5 GS |
202 | } |
203 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | |
204 | ||
205 | for (tries = 0; tries < 25; tries++) { | |
f8b67691 MN |
206 | cpu_status = smp_query_cpu_stopped(pcpu); |
207 | if (cpu_status == QCSS_STOPPED || | |
208 | cpu_status == QCSS_HARDWARE_ERROR) | |
3aa565f5 GS |
209 | break; |
210 | cpu_relax(); | |
211 | } | |
413f7c40 | 212 | } |
3aa565f5 | 213 | |
413f7c40 ME |
214 | if (cpu_status != 0) { |
215 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | |
216 | cpu, pcpu, cpu_status); | |
217 | } | |
218 | ||
25985edc | 219 | /* Isolation and deallocation are definitely done by |
413f7c40 ME |
220 | * drslot_chrp_cpu. If they were not they would be |
221 | * done here. Change isolate state to Isolate and | |
222 | * change allocation-state to Unusable. | |
223 | */ | |
224 | paca[cpu].cpu_start = 0; | |
225 | } | |
226 | ||
227 | /* | |
828a6986 | 228 | * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle |
413f7c40 ME |
229 | * here is that a cpu device node may represent up to two logical cpus |
230 | * in the SMT case. We must honor the assumption in other code that | |
231 | * the logical ids for sibling SMT threads x and y are adjacent, such | |
232 | * that x^1 == y and y^1 == x. | |
233 | */ | |
06ba30b6 | 234 | static int pseries_add_processor(struct device_node *np) |
413f7c40 ME |
235 | { |
236 | unsigned int cpu; | |
8729faaa | 237 | cpumask_var_t candidate_mask, tmp; |
413f7c40 ME |
238 | int err = -ENOSPC, len, nthreads, i; |
239 | const u32 *intserv; | |
240 | ||
e2eb6392 | 241 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
242 | if (!intserv) |
243 | return 0; | |
244 | ||
8729faaa AB |
245 | zalloc_cpumask_var(&candidate_mask, GFP_KERNEL); |
246 | zalloc_cpumask_var(&tmp, GFP_KERNEL); | |
247 | ||
413f7c40 ME |
248 | nthreads = len / sizeof(u32); |
249 | for (i = 0; i < nthreads; i++) | |
8729faaa | 250 | cpumask_set_cpu(i, tmp); |
413f7c40 | 251 | |
86ef5c9a | 252 | cpu_maps_update_begin(); |
413f7c40 | 253 | |
8729faaa | 254 | BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); |
413f7c40 ME |
255 | |
256 | /* Get a bitmap of unoccupied slots. */ | |
8729faaa AB |
257 | cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); |
258 | if (cpumask_empty(candidate_mask)) { | |
413f7c40 ME |
259 | /* If we get here, it most likely means that NR_CPUS is |
260 | * less than the partition's max processors setting. | |
261 | */ | |
262 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | |
263 | " supports %d logical cpus.\n", np->full_name, | |
8729faaa | 264 | cpumask_weight(cpu_possible_mask)); |
413f7c40 ME |
265 | goto out_unlock; |
266 | } | |
267 | ||
8729faaa AB |
268 | while (!cpumask_empty(tmp)) |
269 | if (cpumask_subset(tmp, candidate_mask)) | |
413f7c40 ME |
270 | /* Found a range where we can insert the new cpu(s) */ |
271 | break; | |
272 | else | |
8729faaa | 273 | cpumask_shift_left(tmp, tmp, nthreads); |
413f7c40 | 274 | |
8729faaa | 275 | if (cpumask_empty(tmp)) { |
828a6986 | 276 | printk(KERN_ERR "Unable to find space in cpu_present_mask for" |
413f7c40 ME |
277 | " processor %s with %d thread(s)\n", np->name, |
278 | nthreads); | |
279 | goto out_unlock; | |
280 | } | |
281 | ||
8729faaa AB |
282 | for_each_cpu(cpu, tmp) { |
283 | BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask)); | |
ea0f1cab | 284 | set_cpu_present(cpu, true); |
413f7c40 ME |
285 | set_hard_smp_processor_id(cpu, *intserv++); |
286 | } | |
287 | err = 0; | |
288 | out_unlock: | |
86ef5c9a | 289 | cpu_maps_update_done(); |
8729faaa AB |
290 | free_cpumask_var(candidate_mask); |
291 | free_cpumask_var(tmp); | |
413f7c40 ME |
292 | return err; |
293 | } | |
294 | ||
295 | /* | |
296 | * Update the present map for a cpu node which is going away, and set | |
297 | * the hard id in the paca(s) to -1 to be consistent with boot time | |
298 | * convention for non-present cpus. | |
299 | */ | |
06ba30b6 | 300 | static void pseries_remove_processor(struct device_node *np) |
413f7c40 ME |
301 | { |
302 | unsigned int cpu; | |
303 | int len, nthreads, i; | |
304 | const u32 *intserv; | |
305 | ||
e2eb6392 | 306 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
307 | if (!intserv) |
308 | return; | |
309 | ||
310 | nthreads = len / sizeof(u32); | |
311 | ||
86ef5c9a | 312 | cpu_maps_update_begin(); |
413f7c40 ME |
313 | for (i = 0; i < nthreads; i++) { |
314 | for_each_present_cpu(cpu) { | |
315 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | |
316 | continue; | |
317 | BUG_ON(cpu_online(cpu)); | |
ea0f1cab | 318 | set_cpu_present(cpu, false); |
413f7c40 ME |
319 | set_hard_smp_processor_id(cpu, -1); |
320 | break; | |
321 | } | |
8729faaa | 322 | if (cpu >= nr_cpu_ids) |
413f7c40 ME |
323 | printk(KERN_WARNING "Could not find cpu to remove " |
324 | "with physical id 0x%x\n", intserv[i]); | |
325 | } | |
86ef5c9a | 326 | cpu_maps_update_done(); |
413f7c40 ME |
327 | } |
328 | ||
06ba30b6 ME |
329 | static int pseries_smp_notifier(struct notifier_block *nb, |
330 | unsigned long action, void *node) | |
413f7c40 ME |
331 | { |
332 | int err = NOTIFY_OK; | |
333 | ||
334 | switch (action) { | |
335 | case PSERIES_RECONFIG_ADD: | |
06ba30b6 | 336 | if (pseries_add_processor(node)) |
413f7c40 ME |
337 | err = NOTIFY_BAD; |
338 | break; | |
339 | case PSERIES_RECONFIG_REMOVE: | |
06ba30b6 | 340 | pseries_remove_processor(node); |
413f7c40 ME |
341 | break; |
342 | default: | |
343 | err = NOTIFY_DONE; | |
344 | break; | |
345 | } | |
346 | return err; | |
347 | } | |
348 | ||
06ba30b6 ME |
349 | static struct notifier_block pseries_smp_nb = { |
350 | .notifier_call = pseries_smp_notifier, | |
413f7c40 ME |
351 | }; |
352 | ||
3aa565f5 GS |
353 | #define MAX_CEDE_LATENCY_LEVELS 4 |
354 | #define CEDE_LATENCY_PARAM_LENGTH 10 | |
355 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | |
356 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | |
357 | #define CEDE_LATENCY_TOKEN 45 | |
358 | ||
359 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | |
360 | ||
361 | static int parse_cede_parameters(void) | |
362 | { | |
3aa565f5 | 363 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); |
20a8ab97 AB |
364 | return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
365 | NULL, | |
366 | CEDE_LATENCY_TOKEN, | |
367 | __pa(cede_parameters), | |
368 | CEDE_LATENCY_PARAM_MAX_LENGTH); | |
3aa565f5 GS |
369 | } |
370 | ||
0332c2d4 ME |
371 | static int __init pseries_cpu_hotplug_init(void) |
372 | { | |
64f27585 OJ |
373 | struct device_node *np; |
374 | const char *typep; | |
3aa565f5 | 375 | int cpu; |
f8b67691 | 376 | int qcss_tok; |
64f27585 OJ |
377 | |
378 | for_each_node_by_name(np, "interrupt-controller") { | |
379 | typep = of_get_property(np, "compatible", NULL); | |
380 | if (strstr(typep, "open-pic")) { | |
381 | of_node_put(np); | |
382 | ||
383 | printk(KERN_INFO "CPU Hotplug not supported on " | |
384 | "systems using MPIC\n"); | |
385 | return 0; | |
386 | } | |
387 | } | |
388 | ||
0332c2d4 | 389 | rtas_stop_self_args.token = rtas_token("stop-self"); |
674fa677 | 390 | qcss_tok = rtas_token("query-cpu-stopped-state"); |
0332c2d4 | 391 | |
674fa677 ME |
392 | if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE || |
393 | qcss_tok == RTAS_UNKNOWN_SERVICE) { | |
394 | printk(KERN_INFO "CPU Hotplug not supported by firmware " | |
395 | "- disabling.\n"); | |
396 | return 0; | |
397 | } | |
04da6af9 | 398 | |
06ba30b6 ME |
399 | ppc_md.cpu_die = pseries_mach_cpu_die; |
400 | smp_ops->cpu_disable = pseries_cpu_disable; | |
401 | smp_ops->cpu_die = pseries_cpu_die; | |
413f7c40 ME |
402 | |
403 | /* Processors can be added/removed only on LPAR */ | |
3aa565f5 | 404 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
06ba30b6 | 405 | pSeries_reconfig_notifier_register(&pseries_smp_nb); |
3aa565f5 GS |
406 | cpu_maps_update_begin(); |
407 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | |
408 | default_offline_state = CPU_STATE_INACTIVE; | |
409 | for_each_online_cpu(cpu) | |
410 | set_default_offline_state(cpu); | |
411 | } | |
412 | cpu_maps_update_done(); | |
413 | } | |
413f7c40 | 414 | |
0332c2d4 ME |
415 | return 0; |
416 | } | |
417 | arch_initcall(pseries_cpu_hotplug_init); |