]>
Commit | Line | Data |
---|---|---|
d75e02d6 | 1 | /* |
0699e415 | 2 | * Copyright (C) 2005, 2007, 2009, 2011 Free Software Foundation, Inc. |
d75e02d6 L |
3 | * |
4 | * This file is free software; you can redistribute it and/or modify it | |
5 | * under the terms of the GNU General Public License as published by the | |
748086b7 | 6 | * Free Software Foundation; either version 3, or (at your option) any |
d75e02d6 L |
7 | * later version. |
8 | * | |
d75e02d6 L |
9 | * This file is distributed in the hope that it will be useful, but |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
748086b7 JJ |
14 | * Under Section 7 of GPL version 3, you are granted additional |
15 | * permissions described in the GCC Runtime Library Exception, version | |
16 | * 3.1, as published by the Free Software Foundation. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License and | |
19 | * a copy of the GCC Runtime Library Exception along with this program; | |
20 | * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
21 | * <http://www.gnu.org/licenses/>. | |
d75e02d6 L |
22 | */ |
23 | ||
24 | #define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ | |
25 | #define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ | |
26 | ||
b3172cab UB |
27 | #ifndef __x86_64__ |
28 | /* All 64-bit targets have SSE and DAZ; | |
29 | only check them explicitly for 32-bit ones. */ | |
30 | #include "cpuid.h" | |
31 | #endif | |
adb7b764 | 32 | |
1b7a0148 | 33 | #if !defined __x86_64__ && defined __sun__ && defined __svr4__ |
0699e415 RO |
34 | #include <signal.h> |
35 | #include <ucontext.h> | |
36 | ||
37 | static volatile sig_atomic_t sigill_caught; | |
38 | ||
39 | static void | |
40 | sigill_hdlr (int sig __attribute((unused)), | |
41 | siginfo_t *sip __attribute__((unused)), | |
42 | ucontext_t *ucp) | |
43 | { | |
44 | sigill_caught = 1; | |
45 | /* Set PC to the instruction after the faulting one to skip over it, | |
1b7a0148 | 46 | otherwise we enter an infinite loop. 3 is the size of the movaps |
0699e415 | 47 | instruction. */ |
1b7a0148 | 48 | ucp->uc_mcontext.gregs[EIP] += 3; |
0699e415 RO |
49 | setcontext (ucp); |
50 | } | |
51 | #endif | |
52 | ||
d75e02d6 | 53 | static void __attribute__((constructor)) |
02709c7f | 54 | #ifndef __x86_64__ |
69b3f75b | 55 | /* The i386 ABI only requires 4-byte stack alignment, so this is necessary |
02709c7f JH |
56 | to make sure the fxsave struct gets correct alignment. |
57 | See PR27537 and PR28621. */ | |
58 | __attribute__ ((force_align_arg_pointer)) | |
59 | #endif | |
d75e02d6 L |
60 | set_fast_math (void) |
61 | { | |
62 | #ifndef __x86_64__ | |
d75e02d6 L |
63 | unsigned int eax, ebx, ecx, edx; |
64 | ||
b3172cab | 65 | if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) |
d75e02d6 L |
66 | return; |
67 | ||
b3172cab | 68 | if (edx & bit_SSE) |
d75e02d6 | 69 | { |
0699e415 | 70 | unsigned int mxcsr; |
adb7b764 | 71 | |
0699e415 RO |
72 | #if defined __sun__ && defined __svr4__ |
73 | /* Solaris 2 before Solaris 9 4/04 cannot execute SSE instructions even | |
74 | if the CPU supports them. Programs receive SIGILL instead, so check | |
75 | for that at runtime. */ | |
76 | struct sigaction act, oact; | |
77 | ||
78 | act.sa_handler = sigill_hdlr; | |
79 | sigemptyset (&act.sa_mask); | |
80 | /* Need to set SA_SIGINFO so a ucontext_t * is passed to the handler. */ | |
81 | act.sa_flags = SA_SIGINFO; | |
82 | sigaction (SIGILL, &act, &oact); | |
83 | ||
84 | /* We need a single SSE instruction here so the handler can safely skip | |
85 | over it. */ | |
1b7a0148 | 86 | __asm__ volatile ("movaps %xmm0,%xmm0"); |
0699e415 RO |
87 | |
88 | sigaction (SIGILL, &oact, NULL); | |
89 | ||
90 | if (sigill_caught) | |
91 | return; | |
92 | #endif /* __sun__ && __svr4__ */ | |
93 | ||
b3172cab | 94 | if (edx & bit_FXSAVE) |
adb7b764 L |
95 | { |
96 | /* Check if DAZ is available. */ | |
97 | struct | |
98 | { | |
eff6ca85 UB |
99 | unsigned short cwd; |
100 | unsigned short swd; | |
101 | unsigned short twd; | |
102 | unsigned short fop; | |
103 | unsigned int fip; | |
104 | unsigned int fcs; | |
105 | unsigned int foo; | |
106 | unsigned int fos; | |
107 | unsigned int mxcsr; | |
108 | unsigned int mxcsr_mask; | |
109 | unsigned int st_space[32]; | |
110 | unsigned int xmm_space[32]; | |
111 | unsigned int padding[56]; | |
adb7b764 L |
112 | } __attribute__ ((aligned (16))) fxsave; |
113 | ||
eff6ca85 UB |
114 | /* This is necessary since some implementations of FXSAVE |
115 | do not modify reserved areas within the image. */ | |
116 | fxsave.mxcsr_mask = 0; | |
117 | ||
118 | __builtin_ia32_fxsave (&fxsave); | |
adb7b764 | 119 | |
eff6ca85 | 120 | mxcsr = fxsave.mxcsr; |
adb7b764 L |
121 | |
122 | if (fxsave.mxcsr_mask & MXCSR_DAZ) | |
123 | mxcsr |= MXCSR_DAZ; | |
124 | } | |
eff6ca85 UB |
125 | else |
126 | mxcsr = __builtin_ia32_stmxcsr (); | |
adb7b764 | 127 | |
eff6ca85 | 128 | mxcsr |= MXCSR_FTZ; |
d75e02d6 L |
129 | __builtin_ia32_ldmxcsr (mxcsr); |
130 | } | |
adb7b764 L |
131 | #else |
132 | unsigned int mxcsr = __builtin_ia32_stmxcsr (); | |
133 | mxcsr |= MXCSR_DAZ | MXCSR_FTZ; | |
134 | __builtin_ia32_ldmxcsr (mxcsr); | |
135 | #endif | |
d75e02d6 | 136 | } |