]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2007-2023 Free Software Foundation, Inc. |
cbf2e4d4 HJ |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | #ifndef _X86INTRIN_H_INCLUDED | |
25 | # error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." | |
26 | #endif | |
27 | ||
28 | #ifndef _FMA4INTRIN_H_INCLUDED | |
29 | #define _FMA4INTRIN_H_INCLUDED | |
30 | ||
cbf2e4d4 HJ |
31 | /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ |
32 | #include <ammintrin.h> | |
33 | ||
97db2bf7 ST |
34 | #ifndef __FMA4__ |
35 | #pragma GCC push_options | |
36 | #pragma GCC target("fma4") | |
37 | #define __DISABLE_FMA4__ | |
38 | #endif /* __FMA4__ */ | |
39 | ||
cbf2e4d4 HJ |
40 | /* 128b Floating point multiply/add type instructions. */ |
41 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
42 | _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C) | |
43 | { | |
44 | return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); | |
45 | } | |
46 | ||
47 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
48 | _mm_macc_pd (__m128d __A, __m128d __B, __m128d __C) | |
49 | { | |
50 | return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); | |
51 | } | |
52 | ||
53 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
54 | _mm_macc_ss (__m128 __A, __m128 __B, __m128 __C) | |
55 | { | |
56 | return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); | |
57 | } | |
58 | ||
59 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
60 | _mm_macc_sd (__m128d __A, __m128d __B, __m128d __C) | |
61 | { | |
62 | return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); | |
63 | } | |
64 | ||
65 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
66 | _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C) | |
67 | ||
68 | { | |
89509419 | 69 | return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
cbf2e4d4 HJ |
70 | } |
71 | ||
72 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
73 | _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C) | |
74 | { | |
89509419 | 75 | return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
cbf2e4d4 HJ |
76 | } |
77 | ||
78 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
79 | _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C) | |
80 | { | |
89509419 | 81 | return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
cbf2e4d4 HJ |
82 | } |
83 | ||
84 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
85 | _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C) | |
86 | { | |
89509419 | 87 | return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
cbf2e4d4 HJ |
88 | } |
89 | ||
90 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
91 | _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C) | |
92 | { | |
89509419 | 93 | return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
cbf2e4d4 HJ |
94 | } |
95 | ||
96 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
97 | _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C) | |
98 | { | |
89509419 | 99 | return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
cbf2e4d4 HJ |
100 | } |
101 | ||
102 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
103 | _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C) | |
104 | { | |
89509419 | 105 | return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
cbf2e4d4 HJ |
106 | } |
107 | ||
108 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
109 | _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C) | |
110 | { | |
89509419 | 111 | return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
cbf2e4d4 HJ |
112 | } |
113 | ||
114 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
115 | _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C) | |
116 | { | |
89509419 | 117 | return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
cbf2e4d4 HJ |
118 | } |
119 | ||
120 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
121 | _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C) | |
122 | { | |
89509419 | 123 | return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
cbf2e4d4 HJ |
124 | } |
125 | ||
126 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
127 | _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C) | |
128 | { | |
89509419 | 129 | return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
cbf2e4d4 HJ |
130 | } |
131 | ||
132 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
133 | _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C) | |
134 | { | |
89509419 | 135 | return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
cbf2e4d4 HJ |
136 | } |
137 | ||
138 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
139 | _mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C) | |
140 | { | |
141 | return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); | |
142 | } | |
143 | ||
144 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
145 | _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C) | |
146 | { | |
147 | return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); | |
148 | } | |
149 | ||
150 | extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
151 | _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C) | |
152 | { | |
89509419 | 153 | return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
cbf2e4d4 HJ |
154 | } |
155 | ||
156 | extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
157 | _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C) | |
158 | { | |
89509419 | 159 | return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
cbf2e4d4 HJ |
160 | } |
161 | ||
162 | /* 256b Floating point multiply/add type instructions. */ | |
163 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
164 | _mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C) | |
165 | { | |
166 | return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); | |
167 | } | |
168 | ||
169 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
170 | _mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C) | |
171 | { | |
172 | return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); | |
173 | } | |
174 | ||
175 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
176 | _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C) | |
177 | ||
178 | { | |
89509419 | 179 | return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
cbf2e4d4 HJ |
180 | } |
181 | ||
182 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
183 | _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C) | |
184 | { | |
89509419 | 185 | return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
cbf2e4d4 HJ |
186 | } |
187 | ||
188 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
189 | _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C) | |
190 | { | |
89509419 | 191 | return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
cbf2e4d4 HJ |
192 | } |
193 | ||
194 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
195 | _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C) | |
196 | { | |
89509419 | 197 | return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C); |
cbf2e4d4 HJ |
198 | } |
199 | ||
200 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
201 | _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C) | |
202 | { | |
89509419 | 203 | return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
cbf2e4d4 HJ |
204 | } |
205 | ||
206 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
207 | _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C) | |
208 | { | |
89509419 | 209 | return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C); |
cbf2e4d4 HJ |
210 | } |
211 | ||
212 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
213 | _mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C) | |
214 | { | |
215 | return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); | |
216 | } | |
217 | ||
218 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
219 | _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C) | |
220 | { | |
221 | return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); | |
222 | } | |
223 | ||
224 | extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
225 | _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C) | |
226 | { | |
89509419 | 227 | return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
cbf2e4d4 HJ |
228 | } |
229 | ||
230 | extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
231 | _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C) | |
232 | { | |
89509419 | 233 | return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
cbf2e4d4 HJ |
234 | } |
235 | ||
97db2bf7 ST |
236 | #ifdef __DISABLE_FMA4__ |
237 | #undef __DISABLE_FMA4__ | |
238 | #pragma GCC pop_options | |
239 | #endif /* __DISABLE_FMA4__ */ | |
cbf2e4d4 HJ |
240 | |
241 | #endif |