]> git.ipfire.org Git - people/arne_f/kernel.git/blame - arch/x86/kernel/fpu/core.c
x86/fpu: Simplify and speed up fpu__copy()
[people/arne_f/kernel.git] / arch / x86 / kernel / fpu / core.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 */
78f7f1e5 8#include <asm/fpu/internal.h>
59a36d16 9#include <asm/fpu/regset.h>
fcbc99c4 10#include <asm/fpu/signal.h>
35ac2d7b 11#include <asm/fpu/types.h>
e1cebad4 12#include <asm/traps.h>
fcbc99c4 13
91066588 14#include <linux/hardirq.h>
acd547b2 15#include <linux/pkeys.h>
1da177e4 16
d1898b73
DH
17#define CREATE_TRACE_POINTS
18#include <asm/trace/fpu.h>
19
6f575023
IM
20/*
21 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
22 * depending on the FPU hardware format:
23 */
c47ada30 24union fpregs_state init_fpstate __read_mostly;
6f575023 25
085cc281
IM
26/*
27 * Track whether the kernel is using the FPU state
28 * currently.
29 *
30 * This flag is used:
31 *
32 * - by IRQ context code to potentially use the FPU
33 * if it's unused.
34 *
35 * - to debug kernel_fpu_begin()/end() correctness
36 */
14e153ef
ON
37static DEFINE_PER_CPU(bool, in_kernel_fpu);
38
b0c050c5 39/*
36b544dc 40 * Track which context is using the FPU on the CPU:
b0c050c5 41 */
36b544dc 42DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
b0c050c5 43
416d49ac 44static void kernel_fpu_disable(void)
7575637a 45{
e97131a8 46 WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
7575637a
ON
47 this_cpu_write(in_kernel_fpu, true);
48}
49
416d49ac 50static void kernel_fpu_enable(void)
7575637a 51{
e97131a8 52 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
7575637a
ON
53 this_cpu_write(in_kernel_fpu, false);
54}
55
085cc281
IM
56static bool kernel_fpu_disabled(void)
57{
58 return this_cpu_read(in_kernel_fpu);
59}
60
416d49ac 61static bool interrupted_kernel_fpu_idle(void)
8546c008 62{
c592b573 63 return !kernel_fpu_disabled();
8546c008
LT
64}
65
66/*
67 * Were we in user mode (or vm86 mode) when we were
68 * interrupted?
69 *
70 * Doing kernel_fpu_begin/end() is ok if we are running
71 * in an interrupt context from user mode - we'll just
72 * save the FPU state as required.
73 */
416d49ac 74static bool interrupted_user_mode(void)
8546c008
LT
75{
76 struct pt_regs *regs = get_irq_regs();
f39b6f0e 77 return regs && user_mode(regs);
8546c008
LT
78}
79
80/*
81 * Can we use the FPU in kernel mode with the
82 * whole "kernel_fpu_begin/end()" sequence?
83 *
84 * It's always ok in process context (ie "not interrupt")
85 * but it is sometimes ok even from an irq.
86 */
87bool irq_fpu_usable(void)
88{
89 return !in_interrupt() ||
90 interrupted_user_mode() ||
91 interrupted_kernel_fpu_idle();
92}
93EXPORT_SYMBOL(irq_fpu_usable);
94
b1a74bf8 95void __kernel_fpu_begin(void)
8546c008 96{
36b544dc 97 struct fpu *fpu = &current->thread.fpu;
8546c008 98
e97131a8 99 WARN_ON_FPU(!irq_fpu_usable());
63c6680c 100
3103ae3a 101 kernel_fpu_disable();
14e153ef 102
e4a81bfc 103 if (fpu->initialized) {
5ed73f40
AL
104 /*
105 * Ignore return value -- we don't care if reg state
106 * is clobbered.
107 */
4f836347 108 copy_fpregs_to_fpstate(fpu);
7aeccb83 109 } else {
317b622c 110 __cpu_invalidate_fpregs_state();
8546c008
LT
111 }
112}
b1a74bf8 113EXPORT_SYMBOL(__kernel_fpu_begin);
8546c008 114
b1a74bf8 115void __kernel_fpu_end(void)
8546c008 116{
af2d94fd 117 struct fpu *fpu = &current->thread.fpu;
33a3ebdc 118
e4a81bfc 119 if (fpu->initialized)
003e2e8b 120 copy_kernel_to_fpregs(&fpu->state);
14e153ef 121
3103ae3a 122 kernel_fpu_enable();
8546c008 123}
b1a74bf8 124EXPORT_SYMBOL(__kernel_fpu_end);
8546c008 125
d63e79b1
IM
126void kernel_fpu_begin(void)
127{
128 preempt_disable();
d63e79b1
IM
129 __kernel_fpu_begin();
130}
131EXPORT_SYMBOL_GPL(kernel_fpu_begin);
132
133void kernel_fpu_end(void)
134{
135 __kernel_fpu_end();
136 preempt_enable();
137}
138EXPORT_SYMBOL_GPL(kernel_fpu_end);
139
4af08f2f 140/*
48c4717f 141 * Save the FPU state (mark it for reload if necessary):
87cdb98a
IM
142 *
143 * This only ever gets called for the current task.
4af08f2f 144 */
0c070595 145void fpu__save(struct fpu *fpu)
8546c008 146{
e97131a8 147 WARN_ON_FPU(fpu != &current->thread.fpu);
87cdb98a 148
8546c008 149 preempt_disable();
d1898b73 150 trace_x86_fpu_before_save(fpu);
e4a81bfc 151 if (fpu->initialized) {
5ed73f40 152 if (!copy_fpregs_to_fpstate(fpu)) {
c592b573 153 copy_kernel_to_fpregs(&fpu->state);
5ed73f40 154 }
a9241ea5 155 }
d1898b73 156 trace_x86_fpu_after_save(fpu);
8546c008
LT
157 preempt_enable();
158}
4af08f2f 159EXPORT_SYMBOL_GPL(fpu__save);
8546c008 160
0aba6978
IM
161/*
162 * Legacy x87 fpstate state init:
163 */
c47ada30 164static inline void fpstate_init_fstate(struct fregs_state *fp)
0aba6978
IM
165{
166 fp->cwd = 0xffff037fu;
167 fp->swd = 0xffff0000u;
168 fp->twd = 0xffffffffu;
169 fp->fos = 0xffff0000u;
170}
171
c47ada30 172void fpstate_init(union fpregs_state *state)
1da177e4 173{
a402a8df 174 if (!static_cpu_has(X86_FEATURE_FPU)) {
bf935b0b 175 fpstate_init_soft(&state->soft);
86603283 176 return;
e8a496ac 177 }
e8a496ac 178
bf15a8cf 179 memset(state, 0, fpu_kernel_xstate_size);
1d23c451 180
35ac2d7b 181 if (static_cpu_has(X86_FEATURE_XSAVES))
a5828ed3 182 fpstate_init_xstate(&state->xsave);
01f8fd73 183 if (static_cpu_has(X86_FEATURE_FXSR))
bf935b0b 184 fpstate_init_fxstate(&state->fxsave);
0aba6978 185 else
bf935b0b 186 fpstate_init_fstate(&state->fsave);
86603283 187}
c0ee2cf6 188EXPORT_SYMBOL_GPL(fpstate_init);
86603283 189
a20d7297 190int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
e102f30f 191{
a20d7297
AL
192 dst_fpu->last_cpu = -1;
193
e4a81bfc 194 if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
a20d7297
AL
195 return 0;
196
e97131a8 197 WARN_ON_FPU(src_fpu != &current->thread.fpu);
bfd6fc05 198
b1652900
IM
199 /*
200 * Don't let 'init optimized' areas of the XSAVE area
201 * leak into the child task:
202 */
c592b573 203 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
b1652900
IM
204
205 /*
206 * Save current FPU registers directly into the child
207 * FPU context, without any memory-to-memory copying.
b1652900 208 *
e10078eb
IM
209 * ( The function 'fails' in the FNSAVE case, which destroys
210 * register contents so we have to copy them back. )
b1652900 211 */
b1652900 212 if (!copy_fpregs_to_fpstate(dst_fpu)) {
e10078eb 213 memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
c592b573 214 copy_kernel_to_fpregs(&src_fpu->state);
e102f30f 215 }
c4d6ee6e 216
d1898b73
DH
217 trace_x86_fpu_copy_src(src_fpu);
218 trace_x86_fpu_copy_dst(dst_fpu);
219
a752b53d
IM
220 return 0;
221}
222
97185c95 223/*
c4d72e2d
IM
224 * Activate the current task's in-memory FPU context,
225 * if it has not been used before:
97185c95 226 */
c4d72e2d 227void fpu__activate_curr(struct fpu *fpu)
97185c95 228{
e97131a8 229 WARN_ON_FPU(fpu != &current->thread.fpu);
97185c95 230
e4a81bfc 231 if (!fpu->initialized) {
bf935b0b 232 fpstate_init(&fpu->state);
d1898b73 233 trace_x86_fpu_init_state(fpu);
97185c95 234
d1898b73 235 trace_x86_fpu_activate_state(fpu);
c4d72e2d 236 /* Safe to do for the current task: */
e4a81bfc 237 fpu->initialized = 1;
c4d72e2d 238 }
97185c95 239}
c4d72e2d 240EXPORT_SYMBOL_GPL(fpu__activate_curr);
97185c95 241
05602812
IM
242/*
243 * This function must be called before we read a task's fpstate.
244 *
4618e909
IM
245 * There's two cases where this gets called:
246 *
247 * - for the current task (when coredumping), in which case we have
248 * to save the latest FPU registers into the fpstate,
249 *
250 * - or it's called for stopped tasks (ptrace), in which case the
251 * registers were already saved by the context-switch code when
252 * the task scheduled out - we only have to initialize the registers
253 * if they've never been initialized.
05602812
IM
254 *
255 * If the task has used the FPU before then save it.
256 */
257void fpu__activate_fpstate_read(struct fpu *fpu)
258{
4618e909 259 if (fpu == &current->thread.fpu) {
05602812
IM
260 fpu__save(fpu);
261 } else {
e4a81bfc 262 if (!fpu->initialized) {
05602812 263 fpstate_init(&fpu->state);
d1898b73 264 trace_x86_fpu_init_state(fpu);
05602812 265
d1898b73 266 trace_x86_fpu_activate_state(fpu);
05602812 267 /* Safe to do for current and for stopped child tasks: */
e4a81bfc 268 fpu->initialized = 1;
05602812
IM
269 }
270 }
271}
272
86603283 273/*
343763c3 274 * This function must be called before we write a task's fpstate.
af7f8721 275 *
7f1487c5 276 * If the task has used the FPU before then invalidate any cached FPU registers.
343763c3 277 * If the task has not used the FPU before then initialize its fpstate.
af7f8721 278 *
343763c3
IM
279 * After this function call, after registers in the fpstate are
280 * modified and the child task has woken up, the child task will
281 * restore the modified FPU state from the modified context. If we
7f1487c5 282 * didn't clear its cached status here then the cached in-registers
343763c3
IM
283 * state pending on its former CPU could be restored, corrupting
284 * the modifications.
86603283 285 */
6a81d7eb 286void fpu__activate_fpstate_write(struct fpu *fpu)
86603283 287{
47f01e8c 288 /*
343763c3
IM
289 * Only stopped child tasks can be used to modify the FPU
290 * state in the fpstate buffer:
47f01e8c 291 */
343763c3
IM
292 WARN_ON_FPU(fpu == &current->thread.fpu);
293
e4a81bfc 294 if (fpu->initialized) {
7f1487c5 295 /* Invalidate any cached state: */
25d83b53 296 __fpu_invalidate_fpregs_state(fpu);
2fb29fc7 297 } else {
343763c3 298 fpstate_init(&fpu->state);
d1898b73 299 trace_x86_fpu_init_state(fpu);
47f01e8c 300
d1898b73 301 trace_x86_fpu_activate_state(fpu);
343763c3 302 /* Safe to do for stopped child tasks: */
e4a81bfc 303 fpu->initialized = 1;
2fb29fc7 304 }
1da177e4
LT
305}
306
93b90712 307/*
be7436d5
IM
308 * 'fpu__restore()' is called to copy FPU registers from
309 * the FPU fpstate to the live hw registers and to activate
310 * access to the hardware registers, so that FPU instructions
311 * can be used afterwards.
93b90712 312 *
be7436d5
IM
313 * Must be called with kernel preemption disabled (for example
314 * with local interrupts disabled, as it is in the case of
315 * do_device_not_available()).
93b90712 316 */
e1884d69 317void fpu__restore(struct fpu *fpu)
93b90712 318{
c4d72e2d 319 fpu__activate_curr(fpu);
93b90712 320
232f62cd 321 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
93b90712 322 kernel_fpu_disable();
d1898b73 323 trace_x86_fpu_before_restore(fpu);
232f62cd 324 fpregs_activate(fpu);
003e2e8b 325 copy_kernel_to_fpregs(&fpu->state);
d1898b73 326 trace_x86_fpu_after_restore(fpu);
93b90712
IM
327 kernel_fpu_enable();
328}
3a0aee48 329EXPORT_SYMBOL_GPL(fpu__restore);
93b90712 330
6ffc152e
IM
331/*
332 * Drops current FPU state: deactivates the fpregs and
333 * the fpstate. NOTE: it still leaves previous contents
334 * in the fpregs in the eager-FPU case.
335 *
336 * This function can be used in cases where we know that
337 * a state-restore is coming: either an explicit one,
338 * or a reschedule.
339 */
340void fpu__drop(struct fpu *fpu)
341{
342 preempt_disable();
6ffc152e 343
b6aa8555 344 if (fpu == &current->thread.fpu) {
e4a81bfc 345 if (fpu->initialized) {
b6aa8555
IM
346 /* Ignore delayed exceptions from user space */
347 asm volatile("1: fwait\n"
348 "2:\n"
349 _ASM_EXTABLE(1b, 2b));
6cf4edbe 350 fpregs_deactivate(fpu);
b6aa8555 351 }
6ffc152e
IM
352 }
353
e4a81bfc 354 fpu->initialized = 0;
6ffc152e 355
d1898b73
DH
356 trace_x86_fpu_dropped(fpu);
357
6ffc152e
IM
358 preempt_enable();
359}
360
81541889
IM
361/*
362 * Clear FPU registers by setting them up from
363 * the init fpstate:
364 */
365static inline void copy_init_fpstate_to_fpregs(void)
366{
367 if (use_xsave())
c6813144 368 copy_kernel_to_xregs(&init_fpstate.xsave, -1);
6e686709 369 else if (static_cpu_has(X86_FEATURE_FXSR))
c6813144 370 copy_kernel_to_fxregs(&init_fpstate.fxsave);
6e686709
BP
371 else
372 copy_kernel_to_fregs(&init_fpstate.fsave);
acd547b2
DH
373
374 if (boot_cpu_has(X86_FEATURE_OSPKE))
375 copy_init_pkru_to_fpregs();
81541889
IM
376}
377
6ffc152e 378/*
fbce7782
IM
379 * Clear the FPU state back to init state.
380 *
381 * Called by sys_execve(), by the signal handler code and by various
382 * error paths.
2e85591a 383 */
04c8e01d 384void fpu__clear(struct fpu *fpu)
81683cc8 385{
e97131a8 386 WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
4c138410 387
b22cbe40
YY
388 fpu__drop(fpu);
389
390 /*
391 * Make sure fpstate is cleared and initialized.
392 */
393 if (static_cpu_has(X86_FEATURE_FPU)) {
a10b6a16 394 preempt_disable();
b22cbe40
YY
395 fpu__activate_curr(fpu);
396 user_fpu_begin();
81541889 397 copy_init_fpstate_to_fpregs();
a10b6a16 398 preempt_enable();
81683cc8
IM
399 }
400}
401
e1cebad4
IM
402/*
403 * x87 math exception handling:
404 */
405
e1cebad4
IM
406int fpu__exception_code(struct fpu *fpu, int trap_nr)
407{
408 int err;
409
410 if (trap_nr == X86_TRAP_MF) {
411 unsigned short cwd, swd;
412 /*
413 * (~cwd & swd) will mask out exceptions that are not set to unmasked
414 * status. 0x3f is the exception bits in these regs, 0x200 is the
415 * C1 reg you need in case of a stack fault, 0x040 is the stack
416 * fault bit. We should only be taking one exception at a time,
417 * so if this combination doesn't produce any single exception,
418 * then we have a bad program that isn't synchronizing its FPU usage
419 * and it will suffer the consequences since we won't be able to
6aa6dbfc 420 * fully reproduce the context of the exception.
e1cebad4 421 */
6aa6dbfc
BP
422 if (boot_cpu_has(X86_FEATURE_FXSR)) {
423 cwd = fpu->state.fxsave.cwd;
424 swd = fpu->state.fxsave.swd;
425 } else {
426 cwd = (unsigned short)fpu->state.fsave.cwd;
427 swd = (unsigned short)fpu->state.fsave.swd;
428 }
e1cebad4
IM
429
430 err = swd & ~cwd;
431 } else {
432 /*
433 * The SIMD FPU exceptions are handled a little differently, as there
434 * is only a single status/control register. Thus, to determine which
435 * unmasked exception was caught we must mask the exception mask bits
436 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
437 */
6aa6dbfc
BP
438 unsigned short mxcsr = MXCSR_DEFAULT;
439
440 if (boot_cpu_has(X86_FEATURE_XMM))
441 mxcsr = fpu->state.fxsave.mxcsr;
442
e1cebad4
IM
443 err = ~(mxcsr >> 7) & mxcsr;
444 }
445
446 if (err & 0x001) { /* Invalid op */
447 /*
448 * swd & 0x240 == 0x040: Stack Underflow
449 * swd & 0x240 == 0x240: Stack Overflow
450 * User must clear the SF bit (0x40) if set
451 */
452 return FPE_FLTINV;
453 } else if (err & 0x004) { /* Divide by Zero */
454 return FPE_FLTDIV;
455 } else if (err & 0x008) { /* Overflow */
456 return FPE_FLTOVF;
457 } else if (err & 0x012) { /* Denormal, Underflow */
458 return FPE_FLTUND;
459 } else if (err & 0x020) { /* Precision */
460 return FPE_FLTRES;
461 }
462
463 /*
464 * If we're using IRQ 13, or supposedly even some trap
465 * X86_TRAP_MF implementations, it's possible
466 * we get a spurious trap, which is not an error.
467 */
468 return 0;
469}