]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx2intrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx2intrin.h
CommitLineData
8d9254fc 1/* Copyright (C) 2011-2020 Free Software Foundation, Inc.
977e83a3
KY
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
97db2bf7
ST
28#ifndef _AVX2INTRIN_H_INCLUDED
29#define _AVX2INTRIN_H_INCLUDED
30
31#ifndef __AVX2__
32#pragma GCC push_options
33#pragma GCC target("avx2")
34#define __DISABLE_AVX2__
35#endif /* __AVX2__ */
36
977e83a3
KY
37/* Sum absolute 8-bit integer difference of adjacent groups of 4
38 byte integers in the first 2 operands. Starting offsets within
39 operands are determined by the 3rd mask operand. */
40#ifdef __OPTIMIZE__
41extern __inline __m256i
42__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
43_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
44{
45 return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
46 (__v32qi)__Y, __M);
47}
48#else
49#define _mm256_mpsadbw_epu8(X, Y, M) \
50 ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
51 (__v32qi)(__m256i)(Y), (int)(M)))
52#endif
53
54extern __inline __m256i
55__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
56_mm256_abs_epi8 (__m256i __A)
57{
58 return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
59}
60
61extern __inline __m256i
62__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
63_mm256_abs_epi16 (__m256i __A)
64{
65 return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
66}
67
68extern __inline __m256i
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm256_abs_epi32 (__m256i __A)
71{
72 return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
73}
74
75extern __inline __m256i
76__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
77_mm256_packs_epi32 (__m256i __A, __m256i __B)
78{
79 return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
80}
81
82extern __inline __m256i
83__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
84_mm256_packs_epi16 (__m256i __A, __m256i __B)
85{
86 return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
87}
88
89extern __inline __m256i
90__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91_mm256_packus_epi32 (__m256i __A, __m256i __B)
92{
93 return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
94}
95
96extern __inline __m256i
97__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98_mm256_packus_epi16 (__m256i __A, __m256i __B)
99{
100 return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
101}
102
103extern __inline __m256i
104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105_mm256_add_epi8 (__m256i __A, __m256i __B)
106{
2069d6fc 107 return (__m256i) ((__v32qu)__A + (__v32qu)__B);
977e83a3
KY
108}
109
110extern __inline __m256i
111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112_mm256_add_epi16 (__m256i __A, __m256i __B)
113{
2069d6fc 114 return (__m256i) ((__v16hu)__A + (__v16hu)__B);
977e83a3
KY
115}
116
117extern __inline __m256i
118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119_mm256_add_epi32 (__m256i __A, __m256i __B)
120{
2069d6fc 121 return (__m256i) ((__v8su)__A + (__v8su)__B);
977e83a3
KY
122}
123
124extern __inline __m256i
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_mm256_add_epi64 (__m256i __A, __m256i __B)
127{
2069d6fc 128 return (__m256i) ((__v4du)__A + (__v4du)__B);
977e83a3
KY
129}
130
131extern __inline __m256i
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_mm256_adds_epi8 (__m256i __A, __m256i __B)
134{
135 return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
136}
137
138extern __inline __m256i
139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140_mm256_adds_epi16 (__m256i __A, __m256i __B)
141{
142 return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
143}
144
145extern __inline __m256i
146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147_mm256_adds_epu8 (__m256i __A, __m256i __B)
148{
149 return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
150}
151
152extern __inline __m256i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_mm256_adds_epu16 (__m256i __A, __m256i __B)
155{
156 return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
157}
158
159#ifdef __OPTIMIZE__
160extern __inline __m256i
161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
162_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
163{
164 return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
165 (__v4di)__B,
166 __N * 8);
167}
168#else
169/* In that case (__N*8) will be in vreg, and insn will not be matched. */
170/* Use define instead */
171#define _mm256_alignr_epi8(A, B, N) \
172 ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
173 (__v4di)(__m256i)(B), \
174 (int)(N) * 8))
175#endif
176
177extern __inline __m256i
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm256_and_si256 (__m256i __A, __m256i __B)
180{
2069d6fc 181 return (__m256i) ((__v4du)__A & (__v4du)__B);
977e83a3
KY
182}
183
184extern __inline __m256i
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm256_andnot_si256 (__m256i __A, __m256i __B)
187{
188 return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
189}
190
191extern __inline __m256i
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm256_avg_epu8 (__m256i __A, __m256i __B)
194{
195 return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
196}
197
198extern __inline __m256i
199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
200_mm256_avg_epu16 (__m256i __A, __m256i __B)
201{
202 return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
203}
204
205extern __inline __m256i
206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
207_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
208{
209 return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
210 (__v32qi)__Y,
211 (__v32qi)__M);
212}
213
214#ifdef __OPTIMIZE__
215extern __inline __m256i
216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
217_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
218{
219 return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
220 (__v16hi)__Y,
221 __M);
222}
223#else
224#define _mm256_blend_epi16(X, Y, M) \
225 ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
226 (__v16hi)(__m256i)(Y), (int)(M)))
227#endif
228
229extern __inline __m256i
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
232{
2069d6fc 233 return (__m256i) ((__v32qi)__A == (__v32qi)__B);
977e83a3
KY
234}
235
236extern __inline __m256i
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
239{
2069d6fc 240 return (__m256i) ((__v16hi)__A == (__v16hi)__B);
977e83a3
KY
241}
242
243extern __inline __m256i
244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
246{
2069d6fc 247 return (__m256i) ((__v8si)__A == (__v8si)__B);
977e83a3
KY
248}
249
250extern __inline __m256i
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
253{
2069d6fc 254 return (__m256i) ((__v4di)__A == (__v4di)__B);
977e83a3
KY
255}
256
257extern __inline __m256i
258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
260{
b245befc 261 return (__m256i) ((__v32qs)__A > (__v32qs)__B);
977e83a3
KY
262}
263
264extern __inline __m256i
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
267{
2069d6fc 268 return (__m256i) ((__v16hi)__A > (__v16hi)__B);
977e83a3
KY
269}
270
271extern __inline __m256i
272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
274{
2069d6fc 275 return (__m256i) ((__v8si)__A > (__v8si)__B);
977e83a3
KY
276}
277
278extern __inline __m256i
279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
281{
2069d6fc 282 return (__m256i) ((__v4di)__A > (__v4di)__B);
977e83a3
KY
283}
284
285extern __inline __m256i
286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
288{
289 return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
290 (__v16hi)__Y);
291}
292
293extern __inline __m256i
294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
295_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
296{
297 return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
298}
299
300extern __inline __m256i
301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
302_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
303{
304 return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
305 (__v16hi)__Y);
306}
307
308extern __inline __m256i
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
311{
312 return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
313 (__v16hi)__Y);
314}
315
316extern __inline __m256i
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
319{
320 return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
321}
322
323extern __inline __m256i
324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
325_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
326{
327 return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
328 (__v16hi)__Y);
329}
330
331extern __inline __m256i
332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
333_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
334{
335 return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
336 (__v32qi)__Y);
337}
338
339extern __inline __m256i
340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341_mm256_madd_epi16 (__m256i __A, __m256i __B)
342{
343 return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
344 (__v16hi)__B);
345}
346
347extern __inline __m256i
348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
349_mm256_max_epi8 (__m256i __A, __m256i __B)
350{
351 return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
352}
353
354extern __inline __m256i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm256_max_epi16 (__m256i __A, __m256i __B)
357{
358 return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
359}
360
361extern __inline __m256i
362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363_mm256_max_epi32 (__m256i __A, __m256i __B)
364{
365 return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
366}
367
368extern __inline __m256i
369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
370_mm256_max_epu8 (__m256i __A, __m256i __B)
371{
372 return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
373}
374
375extern __inline __m256i
376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377_mm256_max_epu16 (__m256i __A, __m256i __B)
378{
379 return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
380}
381
382extern __inline __m256i
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm256_max_epu32 (__m256i __A, __m256i __B)
385{
386 return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
387}
388
389extern __inline __m256i
390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391_mm256_min_epi8 (__m256i __A, __m256i __B)
392{
393 return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
394}
395
396extern __inline __m256i
397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398_mm256_min_epi16 (__m256i __A, __m256i __B)
399{
400 return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
401}
402
403extern __inline __m256i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm256_min_epi32 (__m256i __A, __m256i __B)
406{
407 return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
408}
409
410extern __inline __m256i
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm256_min_epu8 (__m256i __A, __m256i __B)
413{
414 return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
415}
416
417extern __inline __m256i
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm256_min_epu16 (__m256i __A, __m256i __B)
420{
421 return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
422}
423
424extern __inline __m256i
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm256_min_epu32 (__m256i __A, __m256i __B)
427{
428 return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
429}
430
431extern __inline int
432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
433_mm256_movemask_epi8 (__m256i __A)
434{
435 return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
436}
437
438extern __inline __m256i
439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440_mm256_cvtepi8_epi16 (__m128i __X)
441{
442 return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
443}
444
445extern __inline __m256i
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_cvtepi8_epi32 (__m128i __X)
448{
449 return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
450}
451
452extern __inline __m256i
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm256_cvtepi8_epi64 (__m128i __X)
455{
456 return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
457}
458
459extern __inline __m256i
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm256_cvtepi16_epi32 (__m128i __X)
462{
463 return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
464}
465
466extern __inline __m256i
467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
468_mm256_cvtepi16_epi64 (__m128i __X)
469{
470 return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
471}
472
473extern __inline __m256i
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm256_cvtepi32_epi64 (__m128i __X)
476{
477 return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
478}
479
480extern __inline __m256i
481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
482_mm256_cvtepu8_epi16 (__m128i __X)
483{
484 return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
485}
486
487extern __inline __m256i
488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
489_mm256_cvtepu8_epi32 (__m128i __X)
490{
491 return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
492}
493
494extern __inline __m256i
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm256_cvtepu8_epi64 (__m128i __X)
497{
498 return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
499}
500
501extern __inline __m256i
502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503_mm256_cvtepu16_epi32 (__m128i __X)
504{
505 return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
506}
507
508extern __inline __m256i
509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510_mm256_cvtepu16_epi64 (__m128i __X)
511{
512 return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
513}
514
515extern __inline __m256i
516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517_mm256_cvtepu32_epi64 (__m128i __X)
518{
519 return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
520}
521
522extern __inline __m256i
523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
524_mm256_mul_epi32 (__m256i __X, __m256i __Y)
525{
526 return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
527}
528
529extern __inline __m256i
530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
532{
533 return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
534 (__v16hi)__Y);
535}
536
537extern __inline __m256i
538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
540{
541 return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
542}
543
544extern __inline __m256i
545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
547{
548 return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
549}
550
551extern __inline __m256i
552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553_mm256_mullo_epi16 (__m256i __A, __m256i __B)
554{
2069d6fc 555 return (__m256i) ((__v16hu)__A * (__v16hu)__B);
977e83a3
KY
556}
557
558extern __inline __m256i
559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560_mm256_mullo_epi32 (__m256i __A, __m256i __B)
561{
2069d6fc 562 return (__m256i) ((__v8su)__A * (__v8su)__B);
977e83a3
KY
563}
564
565extern __inline __m256i
566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
567_mm256_mul_epu32 (__m256i __A, __m256i __B)
568{
569 return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
570}
571
572extern __inline __m256i
573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574_mm256_or_si256 (__m256i __A, __m256i __B)
575{
2069d6fc 576 return (__m256i) ((__v4du)__A | (__v4du)__B);
977e83a3
KY
577}
578
579extern __inline __m256i
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm256_sad_epu8 (__m256i __A, __m256i __B)
582{
583 return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
584}
585
586extern __inline __m256i
587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
588_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
589{
590 return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
591 (__v32qi)__Y);
592}
593
594#ifdef __OPTIMIZE__
595extern __inline __m256i
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm256_shuffle_epi32 (__m256i __A, const int __mask)
598{
599 return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
600}
601
602extern __inline __m256i
603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
604_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
605{
606 return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
607}
608
609extern __inline __m256i
610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
612{
613 return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
614}
615#else
616#define _mm256_shuffle_epi32(A, N) \
617 ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
618#define _mm256_shufflehi_epi16(A, N) \
619 ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
620#define _mm256_shufflelo_epi16(A, N) \
621 ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
622#endif
623
624extern __inline __m256i
625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
626_mm256_sign_epi8 (__m256i __X, __m256i __Y)
627{
628 return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
629}
630
631extern __inline __m256i
632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
633_mm256_sign_epi16 (__m256i __X, __m256i __Y)
634{
635 return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
636}
637
638extern __inline __m256i
639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640_mm256_sign_epi32 (__m256i __X, __m256i __Y)
641{
642 return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
643}
644
645#ifdef __OPTIMIZE__
3e362970
IT
646extern __inline __m256i
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_bslli_epi128 (__m256i __A, const int __N)
649{
650 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
651}
652
977e83a3
KY
653extern __inline __m256i
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm256_slli_si256 (__m256i __A, const int __N)
656{
657 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
658}
659#else
3e362970
IT
660#define _mm256_bslli_epi128(A, N) \
661 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
977e83a3
KY
662#define _mm256_slli_si256(A, N) \
663 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
664#endif
665
666extern __inline __m256i
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm256_slli_epi16 (__m256i __A, int __B)
669{
670 return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
671}
672
673extern __inline __m256i
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_sll_epi16 (__m256i __A, __m128i __B)
676{
677 return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
678}
679
680extern __inline __m256i
681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
682_mm256_slli_epi32 (__m256i __A, int __B)
683{
684 return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
685}
686
687extern __inline __m256i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm256_sll_epi32 (__m256i __A, __m128i __B)
690{
691 return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
692}
693
694extern __inline __m256i
695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
696_mm256_slli_epi64 (__m256i __A, int __B)
697{
698 return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
699}
700
701extern __inline __m256i
702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703_mm256_sll_epi64 (__m256i __A, __m128i __B)
704{
705 return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
706}
707
708extern __inline __m256i
709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710_mm256_srai_epi16 (__m256i __A, int __B)
711{
712 return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
713}
714
715extern __inline __m256i
716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717_mm256_sra_epi16 (__m256i __A, __m128i __B)
718{
719 return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
720}
721
722extern __inline __m256i
723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
724_mm256_srai_epi32 (__m256i __A, int __B)
725{
726 return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_sra_epi32 (__m256i __A, __m128i __B)
732{
733 return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
734}
735
736#ifdef __OPTIMIZE__
3e362970
IT
737extern __inline __m256i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm256_bsrli_epi128 (__m256i __A, const int __N)
740{
741 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
742}
743
977e83a3
KY
744extern __inline __m256i
745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
746_mm256_srli_si256 (__m256i __A, const int __N)
747{
748 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
749}
750#else
3e362970
IT
751#define _mm256_bsrli_epi128(A, N) \
752 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
977e83a3
KY
753#define _mm256_srli_si256(A, N) \
754 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
755#endif
756
757extern __inline __m256i
758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759_mm256_srli_epi16 (__m256i __A, int __B)
760{
761 return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
762}
763
764extern __inline __m256i
765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766_mm256_srl_epi16 (__m256i __A, __m128i __B)
767{
768 return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
769}
770
771extern __inline __m256i
772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
773_mm256_srli_epi32 (__m256i __A, int __B)
774{
775 return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
776}
777
778extern __inline __m256i
779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
780_mm256_srl_epi32 (__m256i __A, __m128i __B)
781{
782 return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
783}
784
785extern __inline __m256i
786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787_mm256_srli_epi64 (__m256i __A, int __B)
788{
789 return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
790}
791
792extern __inline __m256i
793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
794_mm256_srl_epi64 (__m256i __A, __m128i __B)
795{
796 return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
797}
798
799extern __inline __m256i
800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801_mm256_sub_epi8 (__m256i __A, __m256i __B)
802{
2069d6fc 803 return (__m256i) ((__v32qu)__A - (__v32qu)__B);
977e83a3
KY
804}
805
806extern __inline __m256i
807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
808_mm256_sub_epi16 (__m256i __A, __m256i __B)
809{
2069d6fc 810 return (__m256i) ((__v16hu)__A - (__v16hu)__B);
977e83a3
KY
811}
812
813extern __inline __m256i
814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815_mm256_sub_epi32 (__m256i __A, __m256i __B)
816{
2069d6fc 817 return (__m256i) ((__v8su)__A - (__v8su)__B);
977e83a3
KY
818}
819
820extern __inline __m256i
821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
822_mm256_sub_epi64 (__m256i __A, __m256i __B)
823{
2069d6fc 824 return (__m256i) ((__v4du)__A - (__v4du)__B);
977e83a3
KY
825}
826
827extern __inline __m256i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm256_subs_epi8 (__m256i __A, __m256i __B)
830{
831 return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
832}
833
834extern __inline __m256i
835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836_mm256_subs_epi16 (__m256i __A, __m256i __B)
837{
838 return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
839}
840
841extern __inline __m256i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm256_subs_epu8 (__m256i __A, __m256i __B)
844{
845 return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
846}
847
848extern __inline __m256i
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm256_subs_epu16 (__m256i __A, __m256i __B)
851{
852 return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
853}
854
855extern __inline __m256i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
858{
859 return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
860}
861
862extern __inline __m256i
863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
864_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
865{
866 return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
867}
868
869extern __inline __m256i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
872{
873 return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
874}
875
876extern __inline __m256i
877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
879{
880 return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
881}
882
883extern __inline __m256i
884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
885_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
886{
887 return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
888}
889
890extern __inline __m256i
891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
893{
894 return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
895}
896
897extern __inline __m256i
898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
900{
901 return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
902}
903
904extern __inline __m256i
905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
907{
908 return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
909}
910
911extern __inline __m256i
912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
913_mm256_xor_si256 (__m256i __A, __m256i __B)
914{
2069d6fc 915 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
977e83a3
KY
916}
917
918extern __inline __m256i
919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920_mm256_stream_load_si256 (__m256i const *__X)
921{
922 return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
923}
924
925extern __inline __m128
926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
927_mm_broadcastss_ps (__m128 __X)
928{
929 return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
930}
931
932extern __inline __m256
933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934_mm256_broadcastss_ps (__m128 __X)
935{
936 return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
937}
938
939extern __inline __m256d
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm256_broadcastsd_pd (__m128d __X)
942{
943 return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
944}
945
946extern __inline __m256i
947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4ac761b0 948_mm256_broadcastsi128_si256 (__m128i __X)
977e83a3
KY
949{
950 return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
951}
952
953#ifdef __OPTIMIZE__
954extern __inline __m128i
955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
956_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
957{
958 return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
959 (__v4si)__Y,
960 __M);
961}
962#else
963#define _mm_blend_epi32(X, Y, M) \
964 ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
965 (__v4si)(__m128i)(Y), (int)(M)))
966#endif
967
968#ifdef __OPTIMIZE__
969extern __inline __m256i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
972{
973 return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
974 (__v8si)__Y,
975 __M);
976}
977#else
978#define _mm256_blend_epi32(X, Y, M) \
979 ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
980 (__v8si)(__m256i)(Y), (int)(M)))
981#endif
982
983extern __inline __m256i
984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
985_mm256_broadcastb_epi8 (__m128i __X)
986{
987 return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
988}
989
990extern __inline __m256i
991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
992_mm256_broadcastw_epi16 (__m128i __X)
993{
994 return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
995}
996
997extern __inline __m256i
998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999_mm256_broadcastd_epi32 (__m128i __X)
1000{
1001 return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
1002}
1003
1004extern __inline __m256i
1005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1006_mm256_broadcastq_epi64 (__m128i __X)
1007{
1008 return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
1009}
1010
1011extern __inline __m128i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm_broadcastb_epi8 (__m128i __X)
1014{
1015 return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
1016}
1017
1018extern __inline __m128i
1019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020_mm_broadcastw_epi16 (__m128i __X)
1021{
1022 return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
1023}
1024
1025extern __inline __m128i
1026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1027_mm_broadcastd_epi32 (__m128i __X)
1028{
1029 return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
1030}
1031
1032extern __inline __m128i
1033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034_mm_broadcastq_epi64 (__m128i __X)
1035{
1036 return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
1037}
1038
1039extern __inline __m256i
1040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1041_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
1042{
1043 return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
1044}
1045
1046#ifdef __OPTIMIZE__
1047extern __inline __m256d
1048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1049_mm256_permute4x64_pd (__m256d __X, const int __M)
1050{
1051 return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
1052}
1053#else
1054#define _mm256_permute4x64_pd(X, M) \
1055 ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
1056#endif
1057
1058extern __inline __m256
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2ff5ea2d 1060_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
977e83a3 1061{
2ff5ea2d 1062 return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
977e83a3
KY
1063}
1064
1065#ifdef __OPTIMIZE__
1066extern __inline __m256i
1067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1068_mm256_permute4x64_epi64 (__m256i __X, const int __M)
1069{
1070 return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
1071}
1072#else
1073#define _mm256_permute4x64_epi64(X, M) \
1074 ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
1075#endif
1076
1077
1078#ifdef __OPTIMIZE__
1079extern __inline __m256i
1080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1081_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
1082{
1083 return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
1084}
1085#else
1086#define _mm256_permute2x128_si256(X, Y, M) \
1087 ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
1088#endif
1089
1090#ifdef __OPTIMIZE__
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm256_extracti128_si256 (__m256i __X, const int __M)
1094{
1095 return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
1096}
1097#else
1098#define _mm256_extracti128_si256(X, M) \
1099 ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
1100#endif
1101
1102#ifdef __OPTIMIZE__
1103extern __inline __m256i
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
1106{
1107 return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
1108}
1109#else
1110#define _mm256_inserti128_si256(X, Y, M) \
1111 ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
1112 (__v2di)(__m128i)(Y), \
1113 (int)(M)))
1114#endif
1115
1116extern __inline __m256i
1117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118_mm256_maskload_epi32 (int const *__X, __m256i __M )
1119{
1120 return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
1121 (__v8si)__M);
1122}
1123
1124extern __inline __m256i
1125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1126_mm256_maskload_epi64 (long long const *__X, __m256i __M )
1127{
1128 return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
1129 (__v4di)__M);
1130}
1131
1132extern __inline __m128i
1133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1134_mm_maskload_epi32 (int const *__X, __m128i __M )
1135{
1136 return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
1137 (__v4si)__M);
1138}
1139
1140extern __inline __m128i
1141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1142_mm_maskload_epi64 (long long const *__X, __m128i __M )
1143{
1144 return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
1145 (__v2di)__M);
1146}
1147
1148extern __inline void
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
1151{
1152 __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
1153}
1154
1155extern __inline void
1156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1157_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
1158{
1159 __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
1160}
1161
1162extern __inline void
1163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
1165{
1166 __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
1167}
1168
1169extern __inline void
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
1172{
1173 __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
1174}
1175
1176extern __inline __m256i
1177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
1179{
1180 return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
1181}
1182
1183extern __inline __m128i
1184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1185_mm_sllv_epi32 (__m128i __X, __m128i __Y)
1186{
1187 return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
1188}
1189
1190extern __inline __m256i
1191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1192_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
1193{
1194 return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
1195}
1196
1197extern __inline __m128i
1198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199_mm_sllv_epi64 (__m128i __X, __m128i __Y)
1200{
1201 return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
1202}
1203
1204extern __inline __m256i
1205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206_mm256_srav_epi32 (__m256i __X, __m256i __Y)
1207{
1208 return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
1209}
1210
1211extern __inline __m128i
1212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1213_mm_srav_epi32 (__m128i __X, __m128i __Y)
1214{
1215 return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
1216}
1217
1218extern __inline __m256i
1219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
1221{
1222 return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_srlv_epi32 (__m128i __X, __m128i __Y)
1228{
1229 return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
1230}
1231
1232extern __inline __m256i
1233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1234_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
1235{
1236 return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
1237}
1238
1239extern __inline __m128i
1240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241_mm_srlv_epi64 (__m128i __X, __m128i __Y)
1242{
1243 return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
1244}
1245
1246#ifdef __OPTIMIZE__
1247extern __inline __m128d
1248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1249_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
977e83a3 1250{
b5fd0b71
JJ
1251 __v2df __zero = _mm_setzero_pd ();
1252 __v2df __mask = _mm_cmpeq_pd (__zero, __zero);
977e83a3 1253
0b192937 1254 return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
b5fd0b71
JJ
1255 __base,
1256 (__v4si)__index,
1257 __mask,
1258 __scale);
977e83a3
KY
1259}
1260
1261extern __inline __m128d
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1263_mm_mask_i32gather_pd (__m128d __src, double const *__base, __m128i __index,
1264 __m128d __mask, const int __scale)
977e83a3 1265{
b5fd0b71
JJ
1266 return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)__src,
1267 __base,
1268 (__v4si)__index,
1269 (__v2df)__mask,
1270 __scale);
977e83a3
KY
1271}
1272
1273extern __inline __m256d
1274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1275_mm256_i32gather_pd (double const *__base, __m128i __index, const int __scale)
977e83a3 1276{
b5fd0b71
JJ
1277 __v4df __zero = _mm256_setzero_pd ();
1278 __v4df __mask = _mm256_cmp_pd (__zero, __zero, _CMP_EQ_OQ);
977e83a3 1279
0b192937 1280 return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
b5fd0b71
JJ
1281 __base,
1282 (__v4si)__index,
1283 __mask,
1284 __scale);
977e83a3
KY
1285}
1286
1287extern __inline __m256d
1288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1289_mm256_mask_i32gather_pd (__m256d __src, double const *__base,
1290 __m128i __index, __m256d __mask, const int __scale)
977e83a3 1291{
b5fd0b71
JJ
1292 return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)__src,
1293 __base,
1294 (__v4si)__index,
1295 (__v4df)__mask,
1296 __scale);
977e83a3
KY
1297}
1298
1299extern __inline __m128d
1300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1301_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
977e83a3 1302{
b5fd0b71
JJ
1303 __v2df __src = _mm_setzero_pd ();
1304 __v2df __mask = _mm_cmpeq_pd (__src, __src);
977e83a3 1305
b5fd0b71
JJ
1306 return (__m128d) __builtin_ia32_gatherdiv2df (__src,
1307 __base,
1308 (__v2di)__index,
1309 __mask,
1310 __scale);
977e83a3
KY
1311}
1312
1313extern __inline __m128d
1314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1315_mm_mask_i64gather_pd (__m128d __src, double const *__base, __m128i __index,
1316 __m128d __mask, const int __scale)
977e83a3 1317{
b5fd0b71
JJ
1318 return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)__src,
1319 __base,
1320 (__v2di)__index,
1321 (__v2df)__mask,
1322 __scale);
977e83a3
KY
1323}
1324
1325extern __inline __m256d
1326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1327_mm256_i64gather_pd (double const *__base, __m256i __index, const int __scale)
977e83a3 1328{
b5fd0b71
JJ
1329 __v4df __src = _mm256_setzero_pd ();
1330 __v4df __mask = _mm256_cmp_pd (__src, __src, _CMP_EQ_OQ);
977e83a3 1331
b5fd0b71
JJ
1332 return (__m256d) __builtin_ia32_gatherdiv4df (__src,
1333 __base,
1334 (__v4di)__index,
1335 __mask,
1336 __scale);
977e83a3
KY
1337}
1338
1339extern __inline __m256d
1340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1341_mm256_mask_i64gather_pd (__m256d __src, double const *__base,
1342 __m256i __index, __m256d __mask, const int __scale)
977e83a3 1343{
b5fd0b71
JJ
1344 return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)__src,
1345 __base,
1346 (__v4di)__index,
1347 (__v4df)__mask,
1348 __scale);
977e83a3
KY
1349}
1350
1351extern __inline __m128
1352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1353_mm_i32gather_ps (float const *__base, __m128i __index, const int __scale)
977e83a3 1354{
b5fd0b71
JJ
1355 __v4sf __src = _mm_setzero_ps ();
1356 __v4sf __mask = _mm_cmpeq_ps (__src, __src);
977e83a3 1357
b5fd0b71
JJ
1358 return (__m128) __builtin_ia32_gathersiv4sf (__src,
1359 __base,
1360 (__v4si)__index,
1361 __mask,
1362 __scale);
977e83a3
KY
1363}
1364
1365extern __inline __m128
1366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1367_mm_mask_i32gather_ps (__m128 __src, float const *__base, __m128i __index,
1368 __m128 __mask, const int __scale)
977e83a3 1369{
b5fd0b71
JJ
1370 return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)__src,
1371 __base,
1372 (__v4si)__index,
1373 (__v4sf)__mask,
1374 __scale);
977e83a3
KY
1375}
1376
1377extern __inline __m256
1378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1379_mm256_i32gather_ps (float const *__base, __m256i __index, const int __scale)
977e83a3 1380{
b5fd0b71
JJ
1381 __v8sf __src = _mm256_setzero_ps ();
1382 __v8sf __mask = _mm256_cmp_ps (__src, __src, _CMP_EQ_OQ);
977e83a3 1383
b5fd0b71
JJ
1384 return (__m256) __builtin_ia32_gathersiv8sf (__src,
1385 __base,
1386 (__v8si)__index,
1387 __mask,
1388 __scale);
977e83a3
KY
1389}
1390
1391extern __inline __m256
1392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1393_mm256_mask_i32gather_ps (__m256 __src, float const *__base,
1394 __m256i __index, __m256 __mask, const int __scale)
977e83a3 1395{
b5fd0b71
JJ
1396 return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)__src,
1397 __base,
1398 (__v8si)__index,
1399 (__v8sf)__mask,
1400 __scale);
977e83a3
KY
1401}
1402
1403extern __inline __m128
1404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1405_mm_i64gather_ps (float const *__base, __m128i __index, const int __scale)
977e83a3 1406{
b5fd0b71
JJ
1407 __v4sf __src = _mm_setzero_ps ();
1408 __v4sf __mask = _mm_cmpeq_ps (__src, __src);
977e83a3 1409
b5fd0b71
JJ
1410 return (__m128) __builtin_ia32_gatherdiv4sf (__src,
1411 __base,
1412 (__v2di)__index,
1413 __mask,
1414 __scale);
977e83a3
KY
1415}
1416
1417extern __inline __m128
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1419_mm_mask_i64gather_ps (__m128 __src, float const *__base, __m128i __index,
1420 __m128 __mask, const int __scale)
977e83a3 1421{
b5fd0b71
JJ
1422 return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)__src,
1423 __base,
1424 (__v2di)__index,
1425 (__v4sf)__mask,
1426 __scale);
977e83a3
KY
1427}
1428
1429extern __inline __m128
1430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1431_mm256_i64gather_ps (float const *__base, __m256i __index, const int __scale)
977e83a3 1432{
b5fd0b71
JJ
1433 __v4sf __src = _mm_setzero_ps ();
1434 __v4sf __mask = _mm_cmpeq_ps (__src, __src);
977e83a3 1435
b5fd0b71
JJ
1436 return (__m128) __builtin_ia32_gatherdiv4sf256 (__src,
1437 __base,
1438 (__v4di)__index,
1439 __mask,
1440 __scale);
977e83a3
KY
1441}
1442
1443extern __inline __m128
1444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1445_mm256_mask_i64gather_ps (__m128 __src, float const *__base,
1446 __m256i __index, __m128 __mask, const int __scale)
977e83a3 1447{
b5fd0b71
JJ
1448 return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)__src,
1449 __base,
1450 (__v4di)__index,
1451 (__v4sf)__mask,
1452 __scale);
977e83a3
KY
1453}
1454
1455extern __inline __m128i
1456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1457_mm_i32gather_epi64 (long long int const *__base,
1458 __m128i __index, const int __scale)
977e83a3 1459{
b5fd0b71
JJ
1460 __v2di __src = __extension__ (__v2di){ 0, 0 };
1461 __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
977e83a3 1462
b5fd0b71
JJ
1463 return (__m128i) __builtin_ia32_gathersiv2di (__src,
1464 __base,
1465 (__v4si)__index,
1466 __mask,
1467 __scale);
977e83a3
KY
1468}
1469
1470extern __inline __m128i
1471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1472_mm_mask_i32gather_epi64 (__m128i __src, long long int const *__base,
1473 __m128i __index, __m128i __mask, const int __scale)
977e83a3 1474{
b5fd0b71
JJ
1475 return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)__src,
1476 __base,
1477 (__v4si)__index,
1478 (__v2di)__mask,
1479 __scale);
977e83a3
KY
1480}
1481
1482extern __inline __m256i
1483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1484_mm256_i32gather_epi64 (long long int const *__base,
1485 __m128i __index, const int __scale)
977e83a3 1486{
b5fd0b71
JJ
1487 __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
1488 __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
977e83a3 1489
b5fd0b71
JJ
1490 return (__m256i) __builtin_ia32_gathersiv4di (__src,
1491 __base,
1492 (__v4si)__index,
1493 __mask,
1494 __scale);
977e83a3
KY
1495}
1496
1497extern __inline __m256i
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1499_mm256_mask_i32gather_epi64 (__m256i __src, long long int const *__base,
1500 __m128i __index, __m256i __mask,
1501 const int __scale)
977e83a3 1502{
b5fd0b71
JJ
1503 return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)__src,
1504 __base,
1505 (__v4si)__index,
1506 (__v4di)__mask,
1507 __scale);
977e83a3
KY
1508}
1509
1510extern __inline __m128i
1511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1512_mm_i64gather_epi64 (long long int const *__base,
1513 __m128i __index, const int __scale)
977e83a3 1514{
b5fd0b71
JJ
1515 __v2di __src = __extension__ (__v2di){ 0, 0 };
1516 __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
977e83a3 1517
b5fd0b71
JJ
1518 return (__m128i) __builtin_ia32_gatherdiv2di (__src,
1519 __base,
1520 (__v2di)__index,
1521 __mask,
1522 __scale);
977e83a3
KY
1523}
1524
1525extern __inline __m128i
1526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1527_mm_mask_i64gather_epi64 (__m128i __src, long long int const *__base,
1528 __m128i __index, __m128i __mask, const int __scale)
977e83a3 1529{
b5fd0b71
JJ
1530 return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)__src,
1531 __base,
1532 (__v2di)__index,
1533 (__v2di)__mask,
1534 __scale);
977e83a3
KY
1535}
1536
1537extern __inline __m256i
1538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1539_mm256_i64gather_epi64 (long long int const *__base,
1540 __m256i __index, const int __scale)
977e83a3 1541{
b5fd0b71
JJ
1542 __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
1543 __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
977e83a3 1544
b5fd0b71
JJ
1545 return (__m256i) __builtin_ia32_gatherdiv4di (__src,
1546 __base,
1547 (__v4di)__index,
1548 __mask,
1549 __scale);
977e83a3
KY
1550}
1551
1552extern __inline __m256i
1553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1554_mm256_mask_i64gather_epi64 (__m256i __src, long long int const *__base,
1555 __m256i __index, __m256i __mask,
1556 const int __scale)
977e83a3 1557{
b5fd0b71
JJ
1558 return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)__src,
1559 __base,
1560 (__v4di)__index,
1561 (__v4di)__mask,
1562 __scale);
977e83a3
KY
1563}
1564
1565extern __inline __m128i
1566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1567_mm_i32gather_epi32 (int const *__base, __m128i __index, const int __scale)
977e83a3 1568{
b5fd0b71
JJ
1569 __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
1570 __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
977e83a3 1571
b5fd0b71
JJ
1572 return (__m128i) __builtin_ia32_gathersiv4si (__src,
1573 __base,
1574 (__v4si)__index,
1575 __mask,
1576 __scale);
977e83a3
KY
1577}
1578
1579extern __inline __m128i
1580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1581_mm_mask_i32gather_epi32 (__m128i __src, int const *__base, __m128i __index,
1582 __m128i __mask, const int __scale)
977e83a3 1583{
b5fd0b71
JJ
1584 return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)__src,
1585 __base,
1586 (__v4si)__index,
1587 (__v4si)__mask,
1588 __scale);
977e83a3
KY
1589}
1590
1591extern __inline __m256i
1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1593_mm256_i32gather_epi32 (int const *__base, __m256i __index, const int __scale)
977e83a3 1594{
b5fd0b71
JJ
1595 __v8si __src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
1596 __v8si __mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
977e83a3 1597
b5fd0b71
JJ
1598 return (__m256i) __builtin_ia32_gathersiv8si (__src,
1599 __base,
1600 (__v8si)__index,
1601 __mask,
1602 __scale);
977e83a3
KY
1603}
1604
1605extern __inline __m256i
1606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1607_mm256_mask_i32gather_epi32 (__m256i __src, int const *__base,
1608 __m256i __index, __m256i __mask,
1609 const int __scale)
977e83a3 1610{
b5fd0b71
JJ
1611 return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)__src,
1612 __base,
1613 (__v8si)__index,
1614 (__v8si)__mask,
1615 __scale);
977e83a3
KY
1616}
1617
1618extern __inline __m128i
1619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1620_mm_i64gather_epi32 (int const *__base, __m128i __index, const int __scale)
977e83a3 1621{
b5fd0b71
JJ
1622 __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
1623 __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
977e83a3 1624
b5fd0b71
JJ
1625 return (__m128i) __builtin_ia32_gatherdiv4si (__src,
1626 __base,
1627 (__v2di)__index,
1628 __mask,
1629 __scale);
977e83a3
KY
1630}
1631
1632extern __inline __m128i
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1634_mm_mask_i64gather_epi32 (__m128i __src, int const *__base, __m128i __index,
1635 __m128i __mask, const int __scale)
977e83a3 1636{
b5fd0b71
JJ
1637 return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)__src,
1638 __base,
1639 (__v2di)__index,
1640 (__v4si)__mask,
1641 __scale);
977e83a3
KY
1642}
1643
1644extern __inline __m128i
1645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71 1646_mm256_i64gather_epi32 (int const *__base, __m256i __index, const int __scale)
977e83a3 1647{
b5fd0b71
JJ
1648 __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
1649 __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
977e83a3 1650
b5fd0b71
JJ
1651 return (__m128i) __builtin_ia32_gatherdiv4si256 (__src,
1652 __base,
1653 (__v4di)__index,
1654 __mask,
1655 __scale);
977e83a3
KY
1656}
1657
1658extern __inline __m128i
1659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
b5fd0b71
JJ
1660_mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
1661 __m256i __index, __m128i __mask,
1662 const int __scale)
977e83a3 1663{
b5fd0b71
JJ
1664 return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)__src,
1665 __base,
1666 (__v4di)__index,
1667 (__v4si)__mask,
1668 __scale);
977e83a3
KY
1669}
1670#else /* __OPTIMIZE__ */
1671#define _mm_i32gather_pd(BASE, INDEX, SCALE) \
1672 (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
1673 (double const *)BASE, \
1674 (__v4si)(__m128i)INDEX, \
1675 (__v2df)_mm_set1_pd( \
1676 (double)(long long int) -1), \
1677 (int)SCALE)
1678
1679#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1680 (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
1681 (double const *)BASE, \
1682 (__v4si)(__m128i)INDEX, \
1683 (__v2df)(__m128d)MASK, \
1684 (int)SCALE)
1685
1686#define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
1687 (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
1688 (double const *)BASE, \
1689 (__v4si)(__m128i)INDEX, \
1690 (__v4df)_mm256_set1_pd( \
1691 (double)(long long int) -1), \
1692 (int)SCALE)
1693
1694#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1695 (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
1696 (double const *)BASE, \
1697 (__v4si)(__m128i)INDEX, \
1698 (__v4df)(__m256d)MASK, \
1699 (int)SCALE)
1700
1701#define _mm_i64gather_pd(BASE, INDEX, SCALE) \
1702 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
1703 (double const *)BASE, \
1704 (__v2di)(__m128i)INDEX, \
1705 (__v2df)_mm_set1_pd( \
1706 (double)(long long int) -1), \
1707 (int)SCALE)
1708
1709#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1710 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
1711 (double const *)BASE, \
1712 (__v2di)(__m128i)INDEX, \
1713 (__v2df)(__m128d)MASK, \
1714 (int)SCALE)
1715
1716#define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
1717 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
1718 (double const *)BASE, \
1719 (__v4di)(__m256i)INDEX, \
1720 (__v4df)_mm256_set1_pd( \
1721 (double)(long long int) -1), \
1722 (int)SCALE)
1723
1724#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1725 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
1726 (double const *)BASE, \
1727 (__v4di)(__m256i)INDEX, \
1728 (__v4df)(__m256d)MASK, \
1729 (int)SCALE)
1730
1731#define _mm_i32gather_ps(BASE, INDEX, SCALE) \
1732 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
1733 (float const *)BASE, \
1734 (__v4si)(__m128i)INDEX, \
1735 _mm_set1_ps ((float)(int) -1), \
1736 (int)SCALE)
1737
1738#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1739 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \
1740 (float const *)BASE, \
1741 (__v4si)(__m128i)INDEX, \
1742 (__v4sf)(__m128d)MASK, \
1743 (int)SCALE)
1744
1745#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
1746 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
1747 (float const *)BASE, \
1748 (__v8si)(__m256i)INDEX, \
1749 (__v8sf)_mm256_set1_ps ( \
1750 (float)(int) -1), \
1751 (int)SCALE)
1752
1753#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1754 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
1755 (float const *)BASE, \
1756 (__v8si)(__m256i)INDEX, \
1757 (__v8sf)(__m256d)MASK, \
1758 (int)SCALE)
1759
1760#define _mm_i64gather_ps(BASE, INDEX, SCALE) \
1761 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
1762 (float const *)BASE, \
1763 (__v2di)(__m128i)INDEX, \
1764 (__v4sf)_mm_set1_ps ( \
1765 (float)(int) -1), \
1766 (int)SCALE)
1767
1768#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1769 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
1770 (float const *)BASE, \
1771 (__v2di)(__m128i)INDEX, \
1772 (__v4sf)(__m128d)MASK, \
1773 (int)SCALE)
1774
1775#define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
1776 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
1777 (float const *)BASE, \
1778 (__v4di)(__m256i)INDEX, \
1779 (__v4sf)_mm_set1_ps( \
1780 (float)(int) -1), \
1781 (int)SCALE)
1782
1783#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1784 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
1785 (float const *)BASE, \
1786 (__v4di)(__m256i)INDEX, \
1787 (__v4sf)(__m128)MASK, \
1788 (int)SCALE)
1789
1790#define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
1791 (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
1792 (long long const *)BASE, \
1793 (__v4si)(__m128i)INDEX, \
1794 (__v2di)_mm_set1_epi64x (-1), \
1795 (int)SCALE)
1796
1797#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1798 (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
1799 (long long const *)BASE, \
1800 (__v4si)(__m128i)INDEX, \
1801 (__v2di)(__m128i)MASK, \
1802 (int)SCALE)
1803
1804#define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
1805 (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
1806 (long long const *)BASE, \
1807 (__v4si)(__m128i)INDEX, \
1808 (__v4di)_mm256_set1_epi64x (-1), \
1809 (int)SCALE)
1810
1811#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1812 (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
1813 (long long const *)BASE, \
1814 (__v4si)(__m128i)INDEX, \
1815 (__v4di)(__m256i)MASK, \
1816 (int)SCALE)
1817
1818#define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
1819 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
1820 (long long const *)BASE, \
1821 (__v2di)(__m128i)INDEX, \
1822 (__v2di)_mm_set1_epi64x (-1), \
1823 (int)SCALE)
1824
1825#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1826 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
1827 (long long const *)BASE, \
1828 (__v2di)(__m128i)INDEX, \
1829 (__v2di)(__m128i)MASK, \
1830 (int)SCALE)
1831
1832#define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
1833 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
1834 (long long const *)BASE, \
1835 (__v4di)(__m256i)INDEX, \
1836 (__v4di)_mm256_set1_epi64x (-1), \
1837 (int)SCALE)
1838
1839#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1840 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
1841 (long long const *)BASE, \
1842 (__v4di)(__m256i)INDEX, \
1843 (__v4di)(__m256i)MASK, \
1844 (int)SCALE)
1845
1846#define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
1847 (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
1848 (int const *)BASE, \
1849 (__v4si)(__m128i)INDEX, \
1850 (__v4si)_mm_set1_epi32 (-1), \
1851 (int)SCALE)
1852
1853#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1854 (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
1855 (int const *)BASE, \
1856 (__v4si)(__m128i)INDEX, \
1857 (__v4si)(__m128i)MASK, \
1858 (int)SCALE)
1859
1860#define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
1861 (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
1862 (int const *)BASE, \
1863 (__v8si)(__m256i)INDEX, \
1864 (__v8si)_mm256_set1_epi32 (-1), \
1865 (int)SCALE)
1866
1867#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1868 (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
1869 (int const *)BASE, \
1870 (__v8si)(__m256i)INDEX, \
1871 (__v8si)(__m256i)MASK, \
1872 (int)SCALE)
1873
1874#define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
1875 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
1876 (int const *)BASE, \
1877 (__v2di)(__m128i)INDEX, \
1878 (__v4si)_mm_set1_epi32 (-1), \
1879 (int)SCALE)
1880
1881#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1882 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
1883 (int const *)BASE, \
1884 (__v2di)(__m128i)INDEX, \
1885 (__v4si)(__m128i)MASK, \
1886 (int)SCALE)
1887
1888#define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
1889 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
1890 (int const *)BASE, \
1891 (__v4di)(__m256i)INDEX, \
1892 (__v4si)_mm_set1_epi32(-1), \
1893 (int)SCALE)
1894
1895#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1896 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
1897 (int const *)BASE, \
1898 (__v4di)(__m256i)INDEX, \
1899 (__v4si)(__m128i)MASK, \
1900 (int)SCALE)
1901#endif /* __OPTIMIZE__ */
97db2bf7
ST
1902
1903#ifdef __DISABLE_AVX2__
1904#undef __DISABLE_AVX2__
1905#pragma GCC pop_options
1906#endif /* __DISABLE_AVX2__ */
1907
1908#endif /* _AVX2INTRIN_H_INCLUDED */