]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avxintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avxintrin.h
CommitLineData
a945c346 1/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
95879c72
L
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
748086b7 7 the Free Software Foundation; either version 3, or (at your option)
95879c72
L
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
748086b7
JJ
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
95879c72
L
23
24/* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 11.0. */
26
37fe763d
UB
27#ifndef _IMMINTRIN_H_INCLUDED
28# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
29#endif
95879c72 30
97db2bf7
ST
31#ifndef _AVXINTRIN_H_INCLUDED
32#define _AVXINTRIN_H_INCLUDED
33
34#ifndef __AVX__
35#pragma GCC push_options
36#pragma GCC target("avx")
37#define __DISABLE_AVX__
38#endif /* __AVX__ */
39
95879c72
L
40/* Internal data types for implementing the intrinsics. */
41typedef double __v4df __attribute__ ((__vector_size__ (32)));
42typedef float __v8sf __attribute__ ((__vector_size__ (32)));
43typedef long long __v4di __attribute__ ((__vector_size__ (32)));
2069d6fc 44typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
95879c72 45typedef int __v8si __attribute__ ((__vector_size__ (32)));
2069d6fc 46typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
95879c72 47typedef short __v16hi __attribute__ ((__vector_size__ (32)));
2069d6fc 48typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
95879c72 49typedef char __v32qi __attribute__ ((__vector_size__ (32)));
b245befc 50typedef signed char __v32qs __attribute__ ((__vector_size__ (32)));
2069d6fc 51typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
95879c72
L
52
53/* The Intel API is flexible enough that we must allow aliasing with other
54 vector types, and their scalar components. */
55typedef float __m256 __attribute__ ((__vector_size__ (32),
56 __may_alias__));
57typedef long long __m256i __attribute__ ((__vector_size__ (32),
58 __may_alias__));
59typedef double __m256d __attribute__ ((__vector_size__ (32),
60 __may_alias__));
61
c6b0037d
MG
62/* Unaligned version of the same types. */
63typedef float __m256_u __attribute__ ((__vector_size__ (32),
64 __may_alias__,
65 __aligned__ (1)));
66typedef long long __m256i_u __attribute__ ((__vector_size__ (32),
67 __may_alias__,
68 __aligned__ (1)));
69typedef double __m256d_u __attribute__ ((__vector_size__ (32),
70 __may_alias__,
71 __aligned__ (1)));
72
95879c72
L
73/* Compare predicates for scalar and packed compare intrinsics. */
74
75/* Equal (ordered, non-signaling) */
76#define _CMP_EQ_OQ 0x00
77/* Less-than (ordered, signaling) */
78#define _CMP_LT_OS 0x01
79/* Less-than-or-equal (ordered, signaling) */
80#define _CMP_LE_OS 0x02
81/* Unordered (non-signaling) */
82#define _CMP_UNORD_Q 0x03
83/* Not-equal (unordered, non-signaling) */
84#define _CMP_NEQ_UQ 0x04
85/* Not-less-than (unordered, signaling) */
86#define _CMP_NLT_US 0x05
87/* Not-less-than-or-equal (unordered, signaling) */
88#define _CMP_NLE_US 0x06
89/* Ordered (nonsignaling) */
90#define _CMP_ORD_Q 0x07
91/* Equal (unordered, non-signaling) */
92#define _CMP_EQ_UQ 0x08
93/* Not-greater-than-or-equal (unordered, signaling) */
94#define _CMP_NGE_US 0x09
95/* Not-greater-than (unordered, signaling) */
96#define _CMP_NGT_US 0x0a
97/* False (ordered, non-signaling) */
98#define _CMP_FALSE_OQ 0x0b
99/* Not-equal (ordered, non-signaling) */
100#define _CMP_NEQ_OQ 0x0c
101/* Greater-than-or-equal (ordered, signaling) */
102#define _CMP_GE_OS 0x0d
103/* Greater-than (ordered, signaling) */
104#define _CMP_GT_OS 0x0e
105/* True (unordered, non-signaling) */
106#define _CMP_TRUE_UQ 0x0f
107/* Equal (ordered, signaling) */
108#define _CMP_EQ_OS 0x10
109/* Less-than (ordered, non-signaling) */
110#define _CMP_LT_OQ 0x11
111/* Less-than-or-equal (ordered, non-signaling) */
112#define _CMP_LE_OQ 0x12
113/* Unordered (signaling) */
114#define _CMP_UNORD_S 0x13
115/* Not-equal (unordered, signaling) */
116#define _CMP_NEQ_US 0x14
117/* Not-less-than (unordered, non-signaling) */
118#define _CMP_NLT_UQ 0x15
119/* Not-less-than-or-equal (unordered, non-signaling) */
120#define _CMP_NLE_UQ 0x16
121/* Ordered (signaling) */
122#define _CMP_ORD_S 0x17
123/* Equal (unordered, signaling) */
124#define _CMP_EQ_US 0x18
125/* Not-greater-than-or-equal (unordered, non-signaling) */
126#define _CMP_NGE_UQ 0x19
127/* Not-greater-than (unordered, non-signaling) */
128#define _CMP_NGT_UQ 0x1a
129/* False (ordered, signaling) */
130#define _CMP_FALSE_OS 0x1b
131/* Not-equal (ordered, signaling) */
132#define _CMP_NEQ_OS 0x1c
133/* Greater-than-or-equal (ordered, non-signaling) */
134#define _CMP_GE_OQ 0x1d
135/* Greater-than (ordered, non-signaling) */
136#define _CMP_GT_OQ 0x1e
137/* True (unordered, signaling) */
138#define _CMP_TRUE_US 0x1f
139
140extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
141_mm256_add_pd (__m256d __A, __m256d __B)
142{
2069d6fc 143 return (__m256d) ((__v4df)__A + (__v4df)__B);
95879c72
L
144}
145
146extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
147_mm256_add_ps (__m256 __A, __m256 __B)
148{
2069d6fc 149 return (__m256) ((__v8sf)__A + (__v8sf)__B);
95879c72
L
150}
151
152extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
153_mm256_addsub_pd (__m256d __A, __m256d __B)
154{
155 return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
156}
157
158extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159_mm256_addsub_ps (__m256 __A, __m256 __B)
160{
161 return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
162}
163
164
165extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166_mm256_and_pd (__m256d __A, __m256d __B)
167{
168 return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
169}
170
171extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
172_mm256_and_ps (__m256 __A, __m256 __B)
173{
174 return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
175}
176
177extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178_mm256_andnot_pd (__m256d __A, __m256d __B)
179{
180 return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
181}
182
183extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
184_mm256_andnot_ps (__m256 __A, __m256 __B)
185{
186 return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
187}
188
189/* Double/single precision floating point blend instructions - select
190 data from 2 sources using constant/variable mask. */
191
192#ifdef __OPTIMIZE__
193extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
194_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
195{
196 return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
197 (__v4df)__Y,
198 __M);
199}
200
201extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
203{
204 return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
205 (__v8sf)__Y,
206 __M);
207}
208#else
209#define _mm256_blend_pd(X, Y, M) \
210 ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \
211 (__v4df)(__m256d)(Y), (int)(M)))
212
213#define _mm256_blend_ps(X, Y, M) \
214 ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \
215 (__v8sf)(__m256)(Y), (int)(M)))
216#endif
217
218extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
219_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
220{
221 return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
222 (__v4df)__Y,
223 (__v4df)__M);
224}
225
226extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
228{
229 return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
230 (__v8sf)__Y,
231 (__v8sf)__M);
232}
233
234extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
235_mm256_div_pd (__m256d __A, __m256d __B)
236{
2069d6fc 237 return (__m256d) ((__v4df)__A / (__v4df)__B);
95879c72
L
238}
239
240extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
241_mm256_div_ps (__m256 __A, __m256 __B)
242{
2069d6fc 243 return (__m256) ((__v8sf)__A / (__v8sf)__B);
95879c72
L
244}
245
246/* Dot product instructions with mask-defined summing and zeroing parts
247 of result. */
248
249#ifdef __OPTIMIZE__
250extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
251_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
252{
253 return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
254 (__v8sf)__Y,
255 __M);
256}
257#else
258#define _mm256_dp_ps(X, Y, M) \
259 ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \
260 (__v8sf)(__m256)(Y), (int)(M)))
261#endif
262
263extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264_mm256_hadd_pd (__m256d __X, __m256d __Y)
265{
266 return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
267}
268
269extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
270_mm256_hadd_ps (__m256 __X, __m256 __Y)
271{
272 return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
273}
274
275extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
276_mm256_hsub_pd (__m256d __X, __m256d __Y)
277{
278 return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
279}
280
281extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282_mm256_hsub_ps (__m256 __X, __m256 __Y)
283{
284 return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
285}
286
287extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
288_mm256_max_pd (__m256d __A, __m256d __B)
289{
290 return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
291}
292
293extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294_mm256_max_ps (__m256 __A, __m256 __B)
295{
296 return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
297}
298
299extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300_mm256_min_pd (__m256d __A, __m256d __B)
301{
302 return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
303}
304
305extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
306_mm256_min_ps (__m256 __A, __m256 __B)
307{
308 return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
309}
310
311extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
312_mm256_mul_pd (__m256d __A, __m256d __B)
313{
2069d6fc 314 return (__m256d) ((__v4df)__A * (__v4df)__B);
95879c72
L
315}
316
317extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
318_mm256_mul_ps (__m256 __A, __m256 __B)
319{
2069d6fc 320 return (__m256) ((__v8sf)__A * (__v8sf)__B);
95879c72
L
321}
322
323extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
324_mm256_or_pd (__m256d __A, __m256d __B)
325{
326 return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
327}
328
329extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
330_mm256_or_ps (__m256 __A, __m256 __B)
331{
332 return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
333}
334
335#ifdef __OPTIMIZE__
336extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
338{
339 return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
340 __mask);
341}
342
343extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
345{
346 return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
347 __mask);
348}
349#else
350#define _mm256_shuffle_pd(A, B, N) \
351 ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \
352 (__v4df)(__m256d)(B), (int)(N)))
353
354#define _mm256_shuffle_ps(A, B, N) \
355 ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \
356 (__v8sf)(__m256)(B), (int)(N)))
357#endif
358
359extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
360_mm256_sub_pd (__m256d __A, __m256d __B)
361{
2069d6fc 362 return (__m256d) ((__v4df)__A - (__v4df)__B);
95879c72
L
363}
364
365extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_sub_ps (__m256 __A, __m256 __B)
367{
2069d6fc 368 return (__m256) ((__v8sf)__A - (__v8sf)__B);
95879c72
L
369}
370
371extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
372_mm256_xor_pd (__m256d __A, __m256d __B)
373{
374 return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
375}
376
377extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
378_mm256_xor_ps (__m256 __A, __m256 __B)
379{
380 return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
381}
382
383#ifdef __OPTIMIZE__
384extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
385_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
386{
387 return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
388}
389
390extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
391_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
392{
393 return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
394}
395
396extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
397_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
398{
399 return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
400 __P);
401}
402
403extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
405{
406 return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
407 __P);
408}
409
410extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
411_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
412{
413 return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
414}
415
416extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
417_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
418{
419 return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
420}
421#else
422#define _mm_cmp_pd(X, Y, P) \
423 ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
424 (__v2df)(__m128d)(Y), (int)(P)))
425
426#define _mm_cmp_ps(X, Y, P) \
427 ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
428 (__v4sf)(__m128)(Y), (int)(P)))
429
430#define _mm256_cmp_pd(X, Y, P) \
431 ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
432 (__v4df)(__m256d)(Y), (int)(P)))
433
434#define _mm256_cmp_ps(X, Y, P) \
435 ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
436 (__v8sf)(__m256)(Y), (int)(P)))
437
438#define _mm_cmp_sd(X, Y, P) \
439 ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
440 (__v2df)(__m128d)(Y), (int)(P)))
441
442#define _mm_cmp_ss(X, Y, P) \
443 ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
444 (__v4sf)(__m128)(Y), (int)(P)))
445#endif
446
93103603
SP
447extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
448_mm256_cvtsi256_si32 (__m256i __A)
449{
450 __v8si __B = (__v8si) __A;
451 return __B[0];
452}
453
95879c72
L
454extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
455_mm256_cvtepi32_pd (__m128i __A)
456{
457 return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
458}
459
460extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
461_mm256_cvtepi32_ps (__m256i __A)
462{
463 return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
464}
465
466extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
467_mm256_cvtpd_ps (__m256d __A)
468{
469 return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
470}
471
472extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
473_mm256_cvtps_epi32 (__m256 __A)
474{
475 return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
476}
477
478extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
479_mm256_cvtps_pd (__m128 __A)
480{
481 return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
482}
483
484extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
485_mm256_cvttpd_epi32 (__m256d __A)
486{
487 return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
488}
489
490extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
491_mm256_cvtpd_epi32 (__m256d __A)
492{
493 return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
494}
495
496extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
497_mm256_cvttps_epi32 (__m256 __A)
498{
499 return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
500}
501
dcb2c527
JJ
502extern __inline double
503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
504_mm256_cvtsd_f64 (__m256d __A)
505{
506 return __A[0];
507}
508
509extern __inline float
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm256_cvtss_f32 (__m256 __A)
512{
513 return __A[0];
514}
515
95879c72
L
516#ifdef __OPTIMIZE__
517extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
518_mm256_extractf128_pd (__m256d __X, const int __N)
519{
520 return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
521}
522
523extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
524_mm256_extractf128_ps (__m256 __X, const int __N)
525{
526 return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
527}
528
529extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
530_mm256_extractf128_si256 (__m256i __X, const int __N)
531{
532 return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
533}
534
535extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
536_mm256_extract_epi32 (__m256i __X, int const __N)
537{
538 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
539 return _mm_extract_epi32 (__Y, __N % 4);
540}
541
542extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_extract_epi16 (__m256i __X, int const __N)
544{
545 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
546 return _mm_extract_epi16 (__Y, __N % 8);
547}
548
549extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
550_mm256_extract_epi8 (__m256i __X, int const __N)
551{
552 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
553 return _mm_extract_epi8 (__Y, __N % 16);
554}
555
556#ifdef __x86_64__
557extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558_mm256_extract_epi64 (__m256i __X, const int __N)
559{
560 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
561 return _mm_extract_epi64 (__Y, __N % 2);
562}
563#endif
564#else
565#define _mm256_extractf128_pd(X, N) \
566 ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
567 (int)(N)))
568
569#define _mm256_extractf128_ps(X, N) \
570 ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \
571 (int)(N)))
572
573#define _mm256_extractf128_si256(X, N) \
574 ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \
575 (int)(N)))
576
577#define _mm256_extract_epi32(X, N) \
578 (__extension__ \
579 ({ \
580 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
581 _mm_extract_epi32 (__Y, (N) % 4); \
582 }))
583
584#define _mm256_extract_epi16(X, N) \
585 (__extension__ \
586 ({ \
587 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
588 _mm_extract_epi16 (__Y, (N) % 8); \
589 }))
590
591#define _mm256_extract_epi8(X, N) \
592 (__extension__ \
593 ({ \
594 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
595 _mm_extract_epi8 (__Y, (N) % 16); \
596 }))
597
598#ifdef __x86_64__
599#define _mm256_extract_epi64(X, N) \
600 (__extension__ \
601 ({ \
602 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
603 _mm_extract_epi64 (__Y, (N) % 2); \
604 }))
605#endif
606#endif
607
608extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
609_mm256_zeroall (void)
610{
611 __builtin_ia32_vzeroall ();
612}
613
614extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
615_mm256_zeroupper (void)
616{
617 __builtin_ia32_vzeroupper ();
618}
619
620extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
621_mm_permutevar_pd (__m128d __A, __m128i __C)
622{
623 return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
624 (__v2di)__C);
625}
626
627extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
628_mm256_permutevar_pd (__m256d __A, __m256i __C)
629{
630 return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
631 (__v4di)__C);
632}
633
634extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
635_mm_permutevar_ps (__m128 __A, __m128i __C)
636{
637 return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
638 (__v4si)__C);
639}
640
641extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642_mm256_permutevar_ps (__m256 __A, __m256i __C)
643{
644 return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
645 (__v8si)__C);
646}
647
648#ifdef __OPTIMIZE__
649extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
650_mm_permute_pd (__m128d __X, const int __C)
651{
652 return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
653}
654
655extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
656_mm256_permute_pd (__m256d __X, const int __C)
657{
658 return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
659}
660
661extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
662_mm_permute_ps (__m128 __X, const int __C)
663{
664 return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
665}
666
667extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
668_mm256_permute_ps (__m256 __X, const int __C)
669{
670 return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
671}
95879c72
L
672#else
673#define _mm_permute_pd(X, C) \
674 ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
675
676#define _mm256_permute_pd(X, C) \
677 ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C)))
678
679#define _mm_permute_ps(X, C) \
680 ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
681
682#define _mm256_permute_ps(X, C) \
683 ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
95879c72
L
684#endif
685
686#ifdef __OPTIMIZE__
687extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
688_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
689{
690 return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
691 (__v4df)__Y,
692 __C);
693}
694
695extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
696_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
697{
698 return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
699 (__v8sf)__Y,
700 __C);
701}
702
703extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
705{
706 return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
707 (__v8si)__Y,
708 __C);
709}
710#else
711#define _mm256_permute2f128_pd(X, Y, C) \
712 ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \
713 (__v4df)(__m256d)(Y), \
714 (int)(C)))
715
716#define _mm256_permute2f128_ps(X, Y, C) \
717 ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \
718 (__v8sf)(__m256)(Y), \
719 (int)(C)))
720
721#define _mm256_permute2f128_si256(X, Y, C) \
722 ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \
723 (__v8si)(__m256i)(Y), \
724 (int)(C)))
725#endif
726
727extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
728_mm_broadcast_ss (float const *__X)
729{
730 return (__m128) __builtin_ia32_vbroadcastss (__X);
731}
732
733extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734_mm256_broadcast_sd (double const *__X)
735{
736 return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
737}
738
739extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
740_mm256_broadcast_ss (float const *__X)
741{
742 return (__m256) __builtin_ia32_vbroadcastss256 (__X);
743}
744
745extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
746_mm256_broadcast_pd (__m128d const *__X)
747{
748 return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
749}
750
751extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
752_mm256_broadcast_ps (__m128 const *__X)
753{
754 return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
755}
756
757#ifdef __OPTIMIZE__
758extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
759_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
760{
761 return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
762 (__v2df)__Y,
763 __O);
764}
765
766extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
767_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
768{
769 return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
770 (__v4sf)__Y,
771 __O);
772}
773
774extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
775_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
776{
777 return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
778 (__v4si)__Y,
779 __O);
780}
781
782extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
783_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
784{
785 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
23e0d930 786 __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
95879c72
L
787 return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
788}
789
790extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
791_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
792{
793 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
794 __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
795 return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
796}
797
798extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
799_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
800{
801 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
802 __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
803 return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
804}
805
806#ifdef __x86_64__
807extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
fcff2e9c 808_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
95879c72
L
809{
810 __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
23e0d930 811 __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
95879c72
L
812 return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
813}
814#endif
815#else
816#define _mm256_insertf128_pd(X, Y, O) \
817 ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
818 (__v2df)(__m128d)(Y), \
819 (int)(O)))
820
821#define _mm256_insertf128_ps(X, Y, O) \
822 ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
823 (__v4sf)(__m128)(Y), \
824 (int)(O)))
825
826#define _mm256_insertf128_si256(X, Y, O) \
827 ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
828 (__v4si)(__m128i)(Y), \
829 (int)(O)))
830
831#define _mm256_insert_epi32(X, D, N) \
832 (__extension__ \
833 ({ \
834 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
835 __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \
836 _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \
837 }))
838
839#define _mm256_insert_epi16(X, D, N) \
840 (__extension__ \
841 ({ \
842 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
843 __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \
844 _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \
845 }))
846
847#define _mm256_insert_epi8(X, D, N) \
848 (__extension__ \
849 ({ \
850 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
851 __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \
852 _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \
853 }))
854
855#ifdef __x86_64__
856#define _mm256_insert_epi64(X, D, N) \
857 (__extension__ \
858 ({ \
859 __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
860 __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \
861 _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \
862 }))
863#endif
864#endif
865
866extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
867_mm256_load_pd (double const *__P)
868{
869 return *(__m256d *)__P;
870}
871
872extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873_mm256_store_pd (double *__P, __m256d __A)
874{
875 *(__m256d *)__P = __A;
876}
877
878extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879_mm256_load_ps (float const *__P)
880{
881 return *(__m256 *)__P;
882}
883
884extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
885_mm256_store_ps (float *__P, __m256 __A)
886{
887 *(__m256 *)__P = __A;
888}
889
890extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
891_mm256_loadu_pd (double const *__P)
892{
c6b0037d 893 return *(__m256d_u *)__P;
95879c72
L
894}
895
896extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
897_mm256_storeu_pd (double *__P, __m256d __A)
898{
c6b0037d 899 *(__m256d_u *)__P = __A;
95879c72
L
900}
901
902extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
903_mm256_loadu_ps (float const *__P)
904{
c6b0037d 905 return *(__m256_u *)__P;
95879c72
L
906}
907
908extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
909_mm256_storeu_ps (float *__P, __m256 __A)
910{
c6b0037d 911 *(__m256_u *)__P = __A;
95879c72
L
912}
913
914extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
915_mm256_load_si256 (__m256i const *__P)
916{
917 return *__P;
918}
919
920extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
921_mm256_store_si256 (__m256i *__P, __m256i __A)
922{
923 *__P = __A;
924}
925
926extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
c6b0037d 927_mm256_loadu_si256 (__m256i_u const *__P)
95879c72 928{
c6b0037d 929 return *__P;
95879c72
L
930}
931
932extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
c6b0037d 933_mm256_storeu_si256 (__m256i_u *__P, __m256i __A)
95879c72 934{
c6b0037d 935 *__P = __A;
95879c72
L
936}
937
938extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 939_mm_maskload_pd (double const *__P, __m128i __M)
95879c72
L
940{
941 return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
98c6d93c 942 (__v2di)__M);
95879c72
L
943}
944
945extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 946_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
95879c72 947{
98c6d93c 948 __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
95879c72
L
949}
950
951extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 952_mm256_maskload_pd (double const *__P, __m256i __M)
95879c72
L
953{
954 return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
98c6d93c 955 (__v4di)__M);
95879c72
L
956}
957
958extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 959_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
95879c72 960{
98c6d93c 961 __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
95879c72
L
962}
963
964extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 965_mm_maskload_ps (float const *__P, __m128i __M)
95879c72
L
966{
967 return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
98c6d93c 968 (__v4si)__M);
95879c72
L
969}
970
971extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 972_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
95879c72 973{
98c6d93c 974 __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
95879c72
L
975}
976
977extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 978_mm256_maskload_ps (float const *__P, __m256i __M)
95879c72
L
979{
980 return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
98c6d93c 981 (__v8si)__M);
95879c72
L
982}
983
984extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
98c6d93c 985_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
95879c72 986{
98c6d93c 987 __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
95879c72
L
988}
989
990extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
991_mm256_movehdup_ps (__m256 __X)
992{
993 return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
994}
995
996extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
997_mm256_moveldup_ps (__m256 __X)
998{
999 return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
1000}
1001
1002extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1003_mm256_movedup_pd (__m256d __X)
1004{
1005 return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
1006}
1007
1008extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1009_mm256_lddqu_si256 (__m256i const *__P)
1010{
1011 return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
1012}
1013
65b82caa
L
1014extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1015_mm256_stream_si256 (__m256i *__A, __m256i __B)
1016{
1017 __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
1018}
1019
1020extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1021_mm256_stream_pd (double *__A, __m256d __B)
1022{
1023 __builtin_ia32_movntpd256 (__A, (__v4df)__B);
1024}
1025
1026extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1027_mm256_stream_ps (float *__P, __m256 __A)
1028{
1029 __builtin_ia32_movntps256 (__P, (__v8sf)__A);
1030}
1031
95879c72
L
1032extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1033_mm256_rcp_ps (__m256 __A)
1034{
1035 return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
1036}
1037
1038extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1039_mm256_rsqrt_ps (__m256 __A)
1040{
1041 return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
1042}
1043
1044extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1045_mm256_sqrt_pd (__m256d __A)
1046{
1047 return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
1048}
1049
1050extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1051_mm256_sqrt_ps (__m256 __A)
1052{
1053 return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
1054}
1055
1056#ifdef __OPTIMIZE__
1057extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1058_mm256_round_pd (__m256d __V, const int __M)
1059{
1060 return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
1061}
1062
1063extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1064_mm256_round_ps (__m256 __V, const int __M)
1065{
1066 return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
1067}
1068#else
1069#define _mm256_round_pd(V, M) \
1070 ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
1071
1072#define _mm256_round_ps(V, M) \
1073 ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
1074#endif
1075
1076#define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL)
1077#define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
1078#define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
1079#define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
1080
1081extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1082_mm256_unpackhi_pd (__m256d __A, __m256d __B)
1083{
1084 return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
1085}
1086
1087extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1088_mm256_unpacklo_pd (__m256d __A, __m256d __B)
1089{
1090 return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
1091}
1092
1093extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1094_mm256_unpackhi_ps (__m256 __A, __m256 __B)
1095{
1096 return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
1097}
1098
1099extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1100_mm256_unpacklo_ps (__m256 __A, __m256 __B)
1101{
1102 return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
1103}
1104
1105extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1106_mm_testz_pd (__m128d __M, __m128d __V)
1107{
1108 return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
1109}
1110
1111extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1112_mm_testc_pd (__m128d __M, __m128d __V)
1113{
1114 return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
1115}
1116
1117extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1118_mm_testnzc_pd (__m128d __M, __m128d __V)
1119{
1120 return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
1121}
1122
1123extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1124_mm_testz_ps (__m128 __M, __m128 __V)
1125{
1126 return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
1127}
1128
1129extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1130_mm_testc_ps (__m128 __M, __m128 __V)
1131{
1132 return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
1133}
1134
1135extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1136_mm_testnzc_ps (__m128 __M, __m128 __V)
1137{
1138 return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
1139}
1140
1141extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1142_mm256_testz_pd (__m256d __M, __m256d __V)
1143{
1144 return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
1145}
1146
1147extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1148_mm256_testc_pd (__m256d __M, __m256d __V)
1149{
1150 return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
1151}
1152
1153extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1154_mm256_testnzc_pd (__m256d __M, __m256d __V)
1155{
1156 return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
1157}
1158
1159extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1160_mm256_testz_ps (__m256 __M, __m256 __V)
1161{
1162 return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
1163}
1164
1165extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1166_mm256_testc_ps (__m256 __M, __m256 __V)
1167{
1168 return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
1169}
1170
1171extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1172_mm256_testnzc_ps (__m256 __M, __m256 __V)
1173{
1174 return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
1175}
1176
1177extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1178_mm256_testz_si256 (__m256i __M, __m256i __V)
1179{
1180 return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
1181}
1182
1183extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1184_mm256_testc_si256 (__m256i __M, __m256i __V)
1185{
1186 return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
1187}
1188
1189extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1190_mm256_testnzc_si256 (__m256i __M, __m256i __V)
1191{
1192 return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
1193}
1194
1195extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1196_mm256_movemask_pd (__m256d __A)
1197{
1198 return __builtin_ia32_movmskpd256 ((__v4df)__A);
1199}
1200
1201extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1202_mm256_movemask_ps (__m256 __A)
1203{
1204 return __builtin_ia32_movmskps256 ((__v8sf)__A);
1205}
1206
0b192937
UD
1207extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1208_mm256_undefined_pd (void)
1209{
6b0907b4
JJ
1210#pragma GCC diagnostic push
1211#pragma GCC diagnostic ignored "-Winit-self"
0b192937 1212 __m256d __Y = __Y;
6b0907b4 1213#pragma GCC diagnostic pop
0b192937
UD
1214 return __Y;
1215}
1216
1217extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1218_mm256_undefined_ps (void)
1219{
6b0907b4
JJ
1220#pragma GCC diagnostic push
1221#pragma GCC diagnostic ignored "-Winit-self"
0b192937 1222 __m256 __Y = __Y;
6b0907b4 1223#pragma GCC diagnostic pop
0b192937
UD
1224 return __Y;
1225}
1226
1227extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1228_mm256_undefined_si256 (void)
1229{
6b0907b4
JJ
1230#pragma GCC diagnostic push
1231#pragma GCC diagnostic ignored "-Winit-self"
0b192937 1232 __m256i __Y = __Y;
6b0907b4 1233#pragma GCC diagnostic pop
0b192937
UD
1234 return __Y;
1235}
1236
95879c72
L
1237extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1238_mm256_setzero_pd (void)
1239{
1240 return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
1241}
1242
1243extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1244_mm256_setzero_ps (void)
1245{
1246 return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
1247 0.0, 0.0, 0.0, 0.0 };
1248}
1249
1250extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1251_mm256_setzero_si256 (void)
1252{
1253 return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
1254}
1255
1256/* Create the vector [A B C D]. */
1257extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258_mm256_set_pd (double __A, double __B, double __C, double __D)
1259{
1260 return __extension__ (__m256d){ __D, __C, __B, __A };
1261}
1262
1263/* Create the vector [A B C D E F G H]. */
1264extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1265_mm256_set_ps (float __A, float __B, float __C, float __D,
1266 float __E, float __F, float __G, float __H)
1267{
1268 return __extension__ (__m256){ __H, __G, __F, __E,
1269 __D, __C, __B, __A };
1270}
1271
1272/* Create the vector [A B C D E F G H]. */
1273extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1274_mm256_set_epi32 (int __A, int __B, int __C, int __D,
1275 int __E, int __F, int __G, int __H)
1276{
1277 return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
1278 __D, __C, __B, __A };
1279}
1280
1281extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1282_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
1283 short __q11, short __q10, short __q09, short __q08,
1284 short __q07, short __q06, short __q05, short __q04,
1285 short __q03, short __q02, short __q01, short __q00)
1286{
1287 return __extension__ (__m256i)(__v16hi){
1288 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
1289 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
1290 };
1291}
1292
1293extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1294_mm256_set_epi8 (char __q31, char __q30, char __q29, char __q28,
1295 char __q27, char __q26, char __q25, char __q24,
1296 char __q23, char __q22, char __q21, char __q20,
1297 char __q19, char __q18, char __q17, char __q16,
1298 char __q15, char __q14, char __q13, char __q12,
1299 char __q11, char __q10, char __q09, char __q08,
1300 char __q07, char __q06, char __q05, char __q04,
1301 char __q03, char __q02, char __q01, char __q00)
1302{
1303 return __extension__ (__m256i)(__v32qi){
1304 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
1305 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
1306 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
1307 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
1308 };
1309}
1310
1311extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1312_mm256_set_epi64x (long long __A, long long __B, long long __C,
1313 long long __D)
1314{
1315 return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
1316}
1317
1318/* Create a vector with all elements equal to A. */
1319extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1320_mm256_set1_pd (double __A)
1321{
1322 return __extension__ (__m256d){ __A, __A, __A, __A };
1323}
1324
1325/* Create a vector with all elements equal to A. */
1326extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1327_mm256_set1_ps (float __A)
1328{
1329 return __extension__ (__m256){ __A, __A, __A, __A,
1330 __A, __A, __A, __A };
1331}
1332
1333/* Create a vector with all elements equal to A. */
1334extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1335_mm256_set1_epi32 (int __A)
1336{
1337 return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
1338 __A, __A, __A, __A };
1339}
1340
1341extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1342_mm256_set1_epi16 (short __A)
1343{
1344 return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
1345 __A, __A, __A, __A, __A, __A, __A, __A);
1346}
1347
1348extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1349_mm256_set1_epi8 (char __A)
1350{
1351 return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
1352 __A, __A, __A, __A, __A, __A, __A, __A,
1353 __A, __A, __A, __A, __A, __A, __A, __A,
1354 __A, __A, __A, __A, __A, __A, __A, __A);
1355}
1356
1357extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1358_mm256_set1_epi64x (long long __A)
1359{
1360 return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
1361}
1362
1363/* Create vectors of elements in the reversed order from the
1364 _mm256_set_XXX functions. */
1365
1366extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1367_mm256_setr_pd (double __A, double __B, double __C, double __D)
1368{
1369 return _mm256_set_pd (__D, __C, __B, __A);
1370}
1371
1372extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1373_mm256_setr_ps (float __A, float __B, float __C, float __D,
1374 float __E, float __F, float __G, float __H)
1375{
1376 return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
1377}
1378
1379extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1380_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
1381 int __E, int __F, int __G, int __H)
1382{
1383 return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
1384}
1385
1386extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1387_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
1388 short __q11, short __q10, short __q09, short __q08,
1389 short __q07, short __q06, short __q05, short __q04,
1390 short __q03, short __q02, short __q01, short __q00)
1391{
1392 return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
1393 __q04, __q05, __q06, __q07,
1394 __q08, __q09, __q10, __q11,
1395 __q12, __q13, __q14, __q15);
1396}
1397
1398extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1399_mm256_setr_epi8 (char __q31, char __q30, char __q29, char __q28,
1400 char __q27, char __q26, char __q25, char __q24,
1401 char __q23, char __q22, char __q21, char __q20,
1402 char __q19, char __q18, char __q17, char __q16,
1403 char __q15, char __q14, char __q13, char __q12,
1404 char __q11, char __q10, char __q09, char __q08,
1405 char __q07, char __q06, char __q05, char __q04,
1406 char __q03, char __q02, char __q01, char __q00)
1407{
1408 return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
1409 __q04, __q05, __q06, __q07,
1410 __q08, __q09, __q10, __q11,
1411 __q12, __q13, __q14, __q15,
1412 __q16, __q17, __q18, __q19,
1413 __q20, __q21, __q22, __q23,
1414 __q24, __q25, __q26, __q27,
1415 __q28, __q29, __q30, __q31);
1416}
1417
1418extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1419_mm256_setr_epi64x (long long __A, long long __B, long long __C,
1420 long long __D)
1421{
1422 return _mm256_set_epi64x (__D, __C, __B, __A);
1423}
1424
1425/* Casts between various SP, DP, INT vector types. Note that these do no
1426 conversion of values, they just change the type. */
1427extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1428_mm256_castpd_ps (__m256d __A)
1429{
1430 return (__m256) __A;
1431}
1432
1433extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1434_mm256_castpd_si256 (__m256d __A)
1435{
1436 return (__m256i) __A;
1437}
1438
1439extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1440_mm256_castps_pd (__m256 __A)
1441{
1442 return (__m256d) __A;
1443}
1444
1445extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1446_mm256_castps_si256(__m256 __A)
1447{
1448 return (__m256i) __A;
1449}
1450
1451extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1452_mm256_castsi256_ps (__m256i __A)
1453{
1454 return (__m256) __A;
1455}
1456
1457extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1458_mm256_castsi256_pd (__m256i __A)
1459{
1460 return (__m256d) __A;
1461}
1462
1463extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1464_mm256_castpd256_pd128 (__m256d __A)
1465{
1466 return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
1467}
1468
1469extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1470_mm256_castps256_ps128 (__m256 __A)
1471{
1472 return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
1473}
1474
1475extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1476_mm256_castsi256_si128 (__m256i __A)
1477{
1478 return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
1479}
1480
1481/* When cast is done from a 128 to 256-bit type, the low 128 bits of
1482 the 256-bit result contain source parameter value and the upper 128
1483 bits of the result are undefined. Those intrinsics shouldn't
1484 generate any extra moves. */
1485
1486extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1487_mm256_castpd128_pd256 (__m128d __A)
1488{
1489 return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
1490}
1491
1492extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1493_mm256_castps128_ps256 (__m128 __A)
1494{
1495 return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
1496}
1497
1498extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_castsi128_si256 (__m128i __A)
1500{
1501 return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
1502}
97db2bf7 1503
e6b2dc24
JJ
1504/* Similarly, but with zero extension instead of undefined values. */
1505
1506extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1507_mm256_zextpd128_pd256 (__m128d __A)
1508{
1509 return _mm256_insertf128_pd (_mm256_setzero_pd (), __A, 0);
1510}
1511
1512extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1513_mm256_zextps128_ps256 (__m128 __A)
1514{
1515 return _mm256_insertf128_ps (_mm256_setzero_ps (), __A, 0);
1516}
1517
1518extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1519_mm256_zextsi128_si256 (__m128i __A)
1520{
1521 return _mm256_insertf128_si256 (_mm256_setzero_si256 (), __A, 0);
1522}
1523
f4ee3a9e
UB
1524extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1525_mm256_set_m128 ( __m128 __H, __m128 __L)
1526{
1527 return _mm256_insertf128_ps (_mm256_castps128_ps256 (__L), __H, 1);
1528}
1529
1530extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1531_mm256_set_m128d (__m128d __H, __m128d __L)
1532{
1533 return _mm256_insertf128_pd (_mm256_castpd128_pd256 (__L), __H, 1);
1534}
1535
1536extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1537_mm256_set_m128i (__m128i __H, __m128i __L)
1538{
1539 return _mm256_insertf128_si256 (_mm256_castsi128_si256 (__L), __H, 1);
1540}
1541
1542extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1543_mm256_setr_m128 (__m128 __L, __m128 __H)
1544{
1545 return _mm256_set_m128 (__H, __L);
1546}
1547
1548extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1549_mm256_setr_m128d (__m128d __L, __m128d __H)
1550{
1551 return _mm256_set_m128d (__H, __L);
1552}
1553
1554extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1555_mm256_setr_m128i (__m128i __L, __m128i __H)
1556{
1557 return _mm256_set_m128i (__H, __L);
1558}
1559
96d5c6dc
JJ
1560extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1561_mm256_loadu2_m128 (float const *__PH, float const *__PL)
1562{
1563 return _mm256_insertf128_ps (_mm256_castps128_ps256 (_mm_loadu_ps (__PL)),
1564 _mm_loadu_ps (__PH), 1);
1565}
1566
1567extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1568_mm256_storeu2_m128 (float *__PH, float *__PL, __m256 __A)
1569{
1570 _mm_storeu_ps (__PL, _mm256_castps256_ps128 (__A));
1571 _mm_storeu_ps (__PH, _mm256_extractf128_ps (__A, 1));
1572}
1573
1574extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1575_mm256_loadu2_m128d (double const *__PH, double const *__PL)
1576{
1577 return _mm256_insertf128_pd (_mm256_castpd128_pd256 (_mm_loadu_pd (__PL)),
1578 _mm_loadu_pd (__PH), 1);
1579}
1580
1581extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1582_mm256_storeu2_m128d (double *__PH, double *__PL, __m256d __A)
1583{
1584 _mm_storeu_pd (__PL, _mm256_castpd256_pd128 (__A));
1585 _mm_storeu_pd (__PH, _mm256_extractf128_pd (__A, 1));
1586}
1587
1588extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1589_mm256_loadu2_m128i (__m128i_u const *__PH, __m128i_u const *__PL)
1590{
1591 return _mm256_insertf128_si256 (_mm256_castsi128_si256 (_mm_loadu_si128 (__PL)),
1592 _mm_loadu_si128 (__PH), 1);
1593}
1594
1595extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1596_mm256_storeu2_m128i (__m128i_u *__PH, __m128i_u *__PL, __m256i __A)
1597{
1598 _mm_storeu_si128 (__PL, _mm256_castsi256_si128 (__A));
1599 _mm_storeu_si128 (__PH, _mm256_extractf128_si256 (__A, 1));
1600}
1601
97db2bf7
ST
1602#ifdef __DISABLE_AVX__
1603#undef __DISABLE_AVX__
1604#pragma GCC pop_options
1605#endif /* __DISABLE_AVX__ */
1606
1607#endif /* _AVXINTRIN_H_INCLUDED */