]>
Commit | Line | Data |
---|---|---|
04fd09d4 SL |
1 | From 5371efcb8439047783bcaa0d57698dbefb68e8a2 Mon Sep 17 00:00:00 2001 |
2 | From: Russell King <rmk+kernel@armlinux.org.uk> | |
3 | Date: Tue, 10 Apr 2018 11:35:36 +0100 | |
4 | Subject: ARM: avoid Cortex-A9 livelock on tight dmb loops | |
5 | ||
6 | [ Upstream commit 5388a5b82199facacd3d7ac0d05aca6e8f902fed ] | |
7 | ||
8 | machine_crash_nonpanic_core() does this: | |
9 | ||
10 | while (1) | |
11 | cpu_relax(); | |
12 | ||
13 | because the kernel has crashed, and we have no known safe way to deal | |
14 | with the CPU. So, we place the CPU into an infinite loop which we | |
15 | expect it to never exit - at least not until the system as a whole is | |
16 | reset by some method. | |
17 | ||
18 | In the absence of erratum 754327, this code assembles to: | |
19 | ||
20 | b . | |
21 | ||
22 | In other words, an infinite loop. When erratum 754327 is enabled, | |
23 | this becomes: | |
24 | ||
25 | 1: dmb | |
26 | b 1b | |
27 | ||
28 | It has been observed that on some systems (eg, OMAP4) where, if a | |
29 | crash is triggered, the system tries to kexec into the panic kernel, | |
30 | but fails after taking the secondary CPU down - placing it into one | |
31 | of these loops. This causes the system to livelock, and the most | |
32 | noticable effect is the system stops after issuing: | |
33 | ||
34 | Loading crashdump kernel... | |
35 | ||
36 | to the system console. | |
37 | ||
38 | The tested as working solution I came up with was to add wfe() to | |
39 | these infinite loops thusly: | |
40 | ||
41 | while (1) { | |
42 | cpu_relax(); | |
43 | wfe(); | |
44 | } | |
45 | ||
46 | which, without 754327 builds to: | |
47 | ||
48 | 1: wfe | |
49 | b 1b | |
50 | ||
51 | or with 754327 is enabled: | |
52 | ||
53 | 1: dmb | |
54 | wfe | |
55 | b 1b | |
56 | ||
57 | Adding "wfe" does two things depending on the environment we're running | |
58 | under: | |
59 | - where we're running on bare metal, and the processor implements | |
60 | "wfe", it stops us spinning endlessly in a loop where we're never | |
61 | going to do any useful work. | |
62 | - if we're running in a VM, it allows the CPU to be given back to the | |
63 | hypervisor and rescheduled for other purposes (maybe a different VM) | |
64 | rather than wasting CPU cycles inside a crashed VM. | |
65 | ||
66 | However, in light of erratum 794072, Will Deacon wanted to see 10 nops | |
67 | as well - which is reasonable to cover the case where we have erratum | |
68 | 754327 enabled _and_ we have a processor that doesn't implement the | |
69 | wfe hint. | |
70 | ||
71 | So, we now end up with: | |
72 | ||
73 | 1: wfe | |
74 | b 1b | |
75 | ||
76 | when erratum 754327 is disabled, or: | |
77 | ||
78 | 1: dmb | |
79 | nop | |
80 | nop | |
81 | nop | |
82 | nop | |
83 | nop | |
84 | nop | |
85 | nop | |
86 | nop | |
87 | nop | |
88 | nop | |
89 | wfe | |
90 | b 1b | |
91 | ||
92 | when erratum 754327 is enabled. We also get the dmb + 10 nop | |
93 | sequence elsewhere in the kernel, in terminating loops. | |
94 | ||
95 | This is reasonable - it means we get the workaround for erratum | |
96 | 794072 when erratum 754327 is enabled, but still relinquish the dead | |
97 | processor - either by placing it in a lower power mode when wfe is | |
98 | implemented as such or by returning it to the hypervisior, or in the | |
99 | case where wfe is a no-op, we use the workaround specified in erratum | |
100 | 794072 to avoid the problem. | |
101 | ||
102 | These as two entirely orthogonal problems - the 10 nops addresses | |
103 | erratum 794072, and the wfe is an optimisation that makes the system | |
104 | more efficient when crashed either in terms of power consumption or | |
105 | by allowing the host/other VMs to make use of the CPU. | |
106 | ||
107 | I don't see any reason not to use kexec() inside a VM - it has the | |
108 | potential to provide automated recovery from a failure of the VMs | |
109 | kernel with the opportunity for saving a crashdump of the failure. | |
110 | A panic() with a reboot timeout won't do that, and reading the | |
111 | libvirt documentation, setting on_reboot to "preserve" won't either | |
112 | (the documentation states "The preserve action for an on_reboot event | |
113 | is treated as a destroy".) Surely it has to be a good thing to | |
114 | avoiding having CPUs spinning inside a VM that is doing no useful | |
115 | work. | |
116 | ||
117 | Acked-by: Will Deacon <will.deacon@arm.com> | |
118 | Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> | |
119 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
120 | --- | |
121 | arch/arm/include/asm/barrier.h | 2 ++ | |
122 | arch/arm/include/asm/processor.h | 6 +++++- | |
123 | arch/arm/kernel/machine_kexec.c | 5 ++++- | |
124 | arch/arm/kernel/smp.c | 4 +++- | |
125 | arch/arm/mach-omap2/prm_common.c | 4 +++- | |
126 | 5 files changed, 17 insertions(+), 4 deletions(-) | |
127 | ||
128 | diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h | |
129 | index 69772e742a0a..83ae97c049d9 100644 | |
130 | --- a/arch/arm/include/asm/barrier.h | |
131 | +++ b/arch/arm/include/asm/barrier.h | |
132 | @@ -11,6 +11,8 @@ | |
133 | #define sev() __asm__ __volatile__ ("sev" : : : "memory") | |
134 | #define wfe() __asm__ __volatile__ ("wfe" : : : "memory") | |
135 | #define wfi() __asm__ __volatile__ ("wfi" : : : "memory") | |
136 | +#else | |
137 | +#define wfe() do { } while (0) | |
138 | #endif | |
139 | ||
140 | #if __LINUX_ARM_ARCH__ >= 7 | |
141 | diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h | |
142 | index c3d5fc124a05..768b6fe7640e 100644 | |
143 | --- a/arch/arm/include/asm/processor.h | |
144 | +++ b/arch/arm/include/asm/processor.h | |
145 | @@ -77,7 +77,11 @@ extern void release_thread(struct task_struct *); | |
146 | unsigned long get_wchan(struct task_struct *p); | |
147 | ||
148 | #if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327) | |
149 | -#define cpu_relax() smp_mb() | |
150 | +#define cpu_relax() \ | |
151 | + do { \ | |
152 | + smp_mb(); \ | |
153 | + __asm__ __volatile__("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;"); \ | |
154 | + } while (0) | |
155 | #else | |
156 | #define cpu_relax() barrier() | |
157 | #endif | |
158 | diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c | |
159 | index c15318431986..6f77f52baf02 100644 | |
160 | --- a/arch/arm/kernel/machine_kexec.c | |
161 | +++ b/arch/arm/kernel/machine_kexec.c | |
162 | @@ -91,8 +91,11 @@ void machine_crash_nonpanic_core(void *unused) | |
163 | ||
164 | set_cpu_online(smp_processor_id(), false); | |
165 | atomic_dec(&waiting_for_crash_ipi); | |
166 | - while (1) | |
167 | + | |
168 | + while (1) { | |
169 | cpu_relax(); | |
170 | + wfe(); | |
171 | + } | |
172 | } | |
173 | ||
174 | void crash_smp_send_stop(void) | |
175 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
176 | index 65f85737c6a2..844bb2f1ddef 100644 | |
177 | --- a/arch/arm/kernel/smp.c | |
178 | +++ b/arch/arm/kernel/smp.c | |
179 | @@ -602,8 +602,10 @@ static void ipi_cpu_stop(unsigned int cpu) | |
180 | local_fiq_disable(); | |
181 | local_irq_disable(); | |
182 | ||
183 | - while (1) | |
184 | + while (1) { | |
185 | cpu_relax(); | |
186 | + wfe(); | |
187 | + } | |
188 | } | |
189 | ||
190 | static DEFINE_PER_CPU(struct completion *, cpu_completion); | |
191 | diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c | |
192 | index 09180a59b1c9..2f215facba10 100644 | |
193 | --- a/arch/arm/mach-omap2/prm_common.c | |
194 | +++ b/arch/arm/mach-omap2/prm_common.c | |
195 | @@ -528,8 +528,10 @@ void omap_prm_reset_system(void) | |
196 | ||
197 | prm_ll_data->reset_system(); | |
198 | ||
199 | - while (1) | |
200 | + while (1) { | |
201 | cpu_relax(); | |
202 | + wfe(); | |
203 | + } | |
204 | } | |
205 | ||
206 | /** | |
207 | -- | |
208 | 2.19.1 | |
209 |