]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/emmintrin.h
PR target/87853
[thirdparty/gcc.git] / gcc / config / i386 / emmintrin.h
CommitLineData
fbd26352 1/* Copyright (C) 2003-2019 Free Software Foundation, Inc.
e829311e 2
ccd321d7 3 This file is part of GCC.
e829311e 4
ccd321d7 5 GCC is free software; you can redistribute it and/or modify
e829311e 6 it under the terms of the GNU General Public License as published by
6bc9506f 7 the Free Software Foundation; either version 3, or (at your option)
e829311e 8 any later version.
9
ccd321d7 10 GCC is distributed in the hope that it will be useful,
e829311e 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
6bc9506f 15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
e829311e 23
24/* Implemented from the specification included in the Intel C++ Compiler
52fdc46e 25 User Guide and Reference, version 9.0. */
e829311e 26
27#ifndef _EMMINTRIN_H_INCLUDED
28#define _EMMINTRIN_H_INCLUDED
29
3d775f8e 30/* We need definitions from the SSE header files*/
e829311e 31#include <xmmintrin.h>
32
ef21d40e 33#ifndef __SSE2__
34#pragma GCC push_options
35#pragma GCC target("sse2")
36#define __DISABLE_SSE2__
37#endif /* __SSE2__ */
38
e829311e 39/* SSE2 */
aff6787f 40typedef double __v2df __attribute__ ((__vector_size__ (16)));
41typedef long long __v2di __attribute__ ((__vector_size__ (16)));
d521a5b2 42typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
aff6787f 43typedef int __v4si __attribute__ ((__vector_size__ (16)));
d521a5b2 44typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
aff6787f 45typedef short __v8hi __attribute__ ((__vector_size__ (16)));
d521a5b2 46typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
aff6787f 47typedef char __v16qi __attribute__ ((__vector_size__ (16)));
fba369f3 48typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
d521a5b2 49typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
e829311e 50
0e960ba8 51/* The Intel API is flexible enough that we must allow aliasing with other
52 vector types, and their scalar components. */
53typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
54typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
ad2c46cf 55
8036ac7f 56/* Unaligned version of the same types. */
57typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
58typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
59
e829311e 60/* Create a selector for use with the SHUFPD instruction. */
61#define _MM_SHUFFLE2(fp1,fp0) \
62 (((fp1) << 1) | (fp0))
63
ad2c46cf 64/* Create a vector with element 0 as F and the rest zero. */
517b0286 65extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 66_mm_set_sd (double __F)
67{
712fea20 68 return __extension__ (__m128d){ __F, 0.0 };
ad2c46cf 69}
e829311e 70
ad2c46cf 71/* Create a vector with both elements equal to F. */
517b0286 72extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 73_mm_set1_pd (double __F)
e829311e 74{
882b157f 75 return __extension__ (__m128d){ __F, __F };
e829311e 76}
77
517b0286 78extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 79_mm_set_pd1 (double __F)
e829311e 80{
ad2c46cf 81 return _mm_set1_pd (__F);
e829311e 82}
83
ad2c46cf 84/* Create a vector with the lower value X and upper value W. */
517b0286 85extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 86_mm_set_pd (double __W, double __X)
e829311e 87{
882b157f 88 return __extension__ (__m128d){ __X, __W };
e829311e 89}
90
ad2c46cf 91/* Create a vector with the lower value W and upper value X. */
517b0286 92extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 93_mm_setr_pd (double __W, double __X)
e829311e 94{
882b157f 95 return __extension__ (__m128d){ __W, __X };
e829311e 96}
97
0fc245cd 98/* Create an undefined vector. */
99extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100_mm_undefined_pd (void)
101{
102 __m128d __Y = __Y;
103 return __Y;
104}
105
ad2c46cf 106/* Create a vector of zeros. */
517b0286 107extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 108_mm_setzero_pd (void)
e829311e 109{
882b157f 110 return __extension__ (__m128d){ 0.0, 0.0 };
e829311e 111}
112
ad2c46cf 113/* Sets the low DPFP value of A from the low value of B. */
517b0286 114extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 115_mm_move_sd (__m128d __A, __m128d __B)
e829311e 116{
eb1258b0 117 return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
e829311e 118}
119
ad2c46cf 120/* Load two DPFP values from P. The address must be 16-byte aligned. */
517b0286 121extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 122_mm_load_pd (double const *__P)
e829311e 123{
ad2c46cf 124 return *(__m128d *)__P;
e829311e 125}
126
ad2c46cf 127/* Load two DPFP values from P. The address need not be 16-byte aligned. */
517b0286 128extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 129_mm_loadu_pd (double const *__P)
e829311e 130{
8036ac7f 131 return *(__m128d_u *)__P;
e829311e 132}
133
ad2c46cf 134/* Create a vector with all two elements equal to *P. */
517b0286 135extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 136_mm_load1_pd (double const *__P)
e829311e 137{
ad2c46cf 138 return _mm_set1_pd (*__P);
e829311e 139}
140
ad2c46cf 141/* Create a vector with element 0 as *P and the rest zero. */
517b0286 142extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 143_mm_load_sd (double const *__P)
e829311e 144{
ad2c46cf 145 return _mm_set_sd (*__P);
e829311e 146}
147
517b0286 148extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 149_mm_load_pd1 (double const *__P)
e829311e 150{
ad2c46cf 151 return _mm_load1_pd (__P);
e829311e 152}
153
ad2c46cf 154/* Load two DPFP values in reverse order. The address must be aligned. */
517b0286 155extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 156_mm_loadr_pd (double const *__P)
e829311e 157{
ad2c46cf 158 __m128d __tmp = _mm_load_pd (__P);
159 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
160}
161
162/* Store two DPFP values. The address must be 16-byte aligned. */
517b0286 163extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 164_mm_store_pd (double *__P, __m128d __A)
165{
166 *(__m128d *)__P = __A;
167}
168
169/* Store two DPFP values. The address need not be 16-byte aligned. */
517b0286 170extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 171_mm_storeu_pd (double *__P, __m128d __A)
172{
8036ac7f 173 *(__m128d_u *)__P = __A;
e829311e 174}
175
176/* Stores the lower DPFP value. */
517b0286 177extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 178_mm_store_sd (double *__P, __m128d __A)
179{
d521a5b2 180 *__P = ((__v2df)__A)[0];
e829311e 181}
182
517b0286 183extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 184_mm_cvtsd_f64 (__m128d __A)
185{
d521a5b2 186 return ((__v2df)__A)[0];
52fdc46e 187}
188
517b0286 189extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 190_mm_storel_pd (double *__P, __m128d __A)
e829311e 191{
ad2c46cf 192 _mm_store_sd (__P, __A);
e829311e 193}
194
ad2c46cf 195/* Stores the upper DPFP value. */
517b0286 196extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 197_mm_storeh_pd (double *__P, __m128d __A)
e829311e 198{
d521a5b2 199 *__P = ((__v2df)__A)[1];
e829311e 200}
201
ad2c46cf 202/* Store the lower DPFP value across two words.
203 The address must be 16-byte aligned. */
517b0286 204extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 205_mm_store1_pd (double *__P, __m128d __A)
e829311e 206{
ad2c46cf 207 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
e829311e 208}
209
517b0286 210extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 211_mm_store_pd1 (double *__P, __m128d __A)
e829311e 212{
ad2c46cf 213 _mm_store1_pd (__P, __A);
e829311e 214}
215
216/* Store two DPFP values in reverse order. The address must be aligned. */
517b0286 217extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 218_mm_storer_pd (double *__P, __m128d __A)
219{
ad2c46cf 220 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
e829311e 221}
222
517b0286 223extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 224_mm_cvtsi128_si32 (__m128i __A)
225{
e3807cc7 226 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
5802c0cb 227}
228
229#ifdef __x86_64__
52fdc46e 230/* Intel intrinsic. */
517b0286 231extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 232_mm_cvtsi128_si64 (__m128i __A)
233{
d521a5b2 234 return ((__v2di)__A)[0];
52fdc46e 235}
236
237/* Microsoft intrinsic. */
517b0286 238extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 239_mm_cvtsi128_si64x (__m128i __A)
240{
d521a5b2 241 return ((__v2di)__A)[0];
5802c0cb 242}
243#endif
244
517b0286 245extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 246_mm_add_pd (__m128d __A, __m128d __B)
247{
d521a5b2 248 return (__m128d) ((__v2df)__A + (__v2df)__B);
e829311e 249}
250
517b0286 251extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 252_mm_add_sd (__m128d __A, __m128d __B)
253{
254 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
255}
256
517b0286 257extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 258_mm_sub_pd (__m128d __A, __m128d __B)
259{
d521a5b2 260 return (__m128d) ((__v2df)__A - (__v2df)__B);
e829311e 261}
262
517b0286 263extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 264_mm_sub_sd (__m128d __A, __m128d __B)
265{
266 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
267}
268
517b0286 269extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 270_mm_mul_pd (__m128d __A, __m128d __B)
271{
d521a5b2 272 return (__m128d) ((__v2df)__A * (__v2df)__B);
e829311e 273}
274
517b0286 275extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 276_mm_mul_sd (__m128d __A, __m128d __B)
277{
278 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
279}
280
517b0286 281extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 282_mm_div_pd (__m128d __A, __m128d __B)
283{
d521a5b2 284 return (__m128d) ((__v2df)__A / (__v2df)__B);
e829311e 285}
286
517b0286 287extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 288_mm_div_sd (__m128d __A, __m128d __B)
289{
290 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
291}
292
517b0286 293extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 294_mm_sqrt_pd (__m128d __A)
295{
296 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
297}
298
d521a5b2 299/* Return pair {sqrt (B[0]), A[1]}. */
517b0286 300extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 301_mm_sqrt_sd (__m128d __A, __m128d __B)
302{
303 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
304 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
305}
306
517b0286 307extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 308_mm_min_pd (__m128d __A, __m128d __B)
309{
310 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
311}
312
517b0286 313extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 314_mm_min_sd (__m128d __A, __m128d __B)
315{
316 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
317}
318
517b0286 319extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 320_mm_max_pd (__m128d __A, __m128d __B)
321{
322 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
323}
324
517b0286 325extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 326_mm_max_sd (__m128d __A, __m128d __B)
327{
328 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
329}
330
517b0286 331extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 332_mm_and_pd (__m128d __A, __m128d __B)
333{
334 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
335}
336
517b0286 337extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 338_mm_andnot_pd (__m128d __A, __m128d __B)
339{
340 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
341}
342
517b0286 343extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 344_mm_or_pd (__m128d __A, __m128d __B)
345{
346 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
347}
348
517b0286 349extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 350_mm_xor_pd (__m128d __A, __m128d __B)
351{
352 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
353}
354
517b0286 355extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 356_mm_cmpeq_pd (__m128d __A, __m128d __B)
357{
358 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
359}
360
517b0286 361extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 362_mm_cmplt_pd (__m128d __A, __m128d __B)
363{
364 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
365}
366
517b0286 367extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 368_mm_cmple_pd (__m128d __A, __m128d __B)
369{
370 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
371}
372
517b0286 373extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 374_mm_cmpgt_pd (__m128d __A, __m128d __B)
375{
376 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
377}
378
517b0286 379extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 380_mm_cmpge_pd (__m128d __A, __m128d __B)
381{
382 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
383}
384
517b0286 385extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 386_mm_cmpneq_pd (__m128d __A, __m128d __B)
387{
388 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
389}
390
517b0286 391extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 392_mm_cmpnlt_pd (__m128d __A, __m128d __B)
393{
394 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
395}
396
517b0286 397extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 398_mm_cmpnle_pd (__m128d __A, __m128d __B)
399{
400 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
401}
402
517b0286 403extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 404_mm_cmpngt_pd (__m128d __A, __m128d __B)
405{
406 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
407}
408
517b0286 409extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 410_mm_cmpnge_pd (__m128d __A, __m128d __B)
411{
412 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
413}
414
517b0286 415extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 416_mm_cmpord_pd (__m128d __A, __m128d __B)
417{
418 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
419}
420
517b0286 421extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 422_mm_cmpunord_pd (__m128d __A, __m128d __B)
423{
424 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
425}
426
517b0286 427extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 428_mm_cmpeq_sd (__m128d __A, __m128d __B)
429{
430 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
431}
432
517b0286 433extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 434_mm_cmplt_sd (__m128d __A, __m128d __B)
435{
436 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
437}
438
517b0286 439extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 440_mm_cmple_sd (__m128d __A, __m128d __B)
441{
442 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
443}
444
517b0286 445extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 446_mm_cmpgt_sd (__m128d __A, __m128d __B)
447{
448 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
449 (__v2df)
450 __builtin_ia32_cmpltsd ((__v2df) __B,
451 (__v2df)
452 __A));
453}
454
517b0286 455extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 456_mm_cmpge_sd (__m128d __A, __m128d __B)
457{
458 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
459 (__v2df)
460 __builtin_ia32_cmplesd ((__v2df) __B,
461 (__v2df)
462 __A));
463}
464
517b0286 465extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 466_mm_cmpneq_sd (__m128d __A, __m128d __B)
467{
468 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
469}
470
517b0286 471extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 472_mm_cmpnlt_sd (__m128d __A, __m128d __B)
473{
474 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
475}
476
517b0286 477extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 478_mm_cmpnle_sd (__m128d __A, __m128d __B)
479{
480 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
481}
482
517b0286 483extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 484_mm_cmpngt_sd (__m128d __A, __m128d __B)
485{
486 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
487 (__v2df)
488 __builtin_ia32_cmpnltsd ((__v2df) __B,
489 (__v2df)
490 __A));
491}
492
517b0286 493extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 494_mm_cmpnge_sd (__m128d __A, __m128d __B)
495{
496 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
497 (__v2df)
498 __builtin_ia32_cmpnlesd ((__v2df) __B,
499 (__v2df)
500 __A));
501}
502
517b0286 503extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 504_mm_cmpord_sd (__m128d __A, __m128d __B)
505{
506 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
507}
508
517b0286 509extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 510_mm_cmpunord_sd (__m128d __A, __m128d __B)
511{
512 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
513}
514
517b0286 515extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 516_mm_comieq_sd (__m128d __A, __m128d __B)
517{
518 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
519}
520
517b0286 521extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 522_mm_comilt_sd (__m128d __A, __m128d __B)
523{
524 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
525}
526
517b0286 527extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 528_mm_comile_sd (__m128d __A, __m128d __B)
529{
530 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
531}
532
517b0286 533extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 534_mm_comigt_sd (__m128d __A, __m128d __B)
535{
536 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
537}
538
517b0286 539extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 540_mm_comige_sd (__m128d __A, __m128d __B)
541{
542 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
543}
544
517b0286 545extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 546_mm_comineq_sd (__m128d __A, __m128d __B)
547{
548 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
549}
550
517b0286 551extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 552_mm_ucomieq_sd (__m128d __A, __m128d __B)
553{
554 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
555}
556
517b0286 557extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 558_mm_ucomilt_sd (__m128d __A, __m128d __B)
559{
560 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
561}
562
517b0286 563extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 564_mm_ucomile_sd (__m128d __A, __m128d __B)
565{
566 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
567}
568
517b0286 569extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 570_mm_ucomigt_sd (__m128d __A, __m128d __B)
571{
572 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
573}
574
517b0286 575extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 576_mm_ucomige_sd (__m128d __A, __m128d __B)
577{
578 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
579}
580
517b0286 581extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 582_mm_ucomineq_sd (__m128d __A, __m128d __B)
583{
584 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
585}
586
ad2c46cf 587/* Create a vector of Qi, where i is the element number. */
e829311e 588
517b0286 589extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 590_mm_set_epi64x (long long __q1, long long __q0)
e829311e 591{
882b157f 592 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
e829311e 593}
594
517b0286 595extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 596_mm_set_epi64 (__m64 __q1, __m64 __q0)
e829311e 597{
ad2c46cf 598 return _mm_set_epi64x ((long long)__q1, (long long)__q0);
e829311e 599}
600
517b0286 601extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 602_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
e829311e 603{
882b157f 604 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
e829311e 605}
606
517b0286 607extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 608_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
609 short __q3, short __q2, short __q1, short __q0)
e829311e 610{
882b157f 611 return __extension__ (__m128i)(__v8hi){
612 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
e829311e 613}
614
517b0286 615extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 616_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
617 char __q11, char __q10, char __q09, char __q08,
618 char __q07, char __q06, char __q05, char __q04,
619 char __q03, char __q02, char __q01, char __q00)
e829311e 620{
882b157f 621 return __extension__ (__m128i)(__v16qi){
ad2c46cf 622 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
623 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
624 };
e829311e 625}
626
ad2c46cf 627/* Set all of the elements of the vector to A. */
e829311e 628
517b0286 629extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 630_mm_set1_epi64x (long long __A)
e829311e 631{
ad2c46cf 632 return _mm_set_epi64x (__A, __A);
e829311e 633}
634
517b0286 635extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 636_mm_set1_epi64 (__m64 __A)
e829311e 637{
ad2c46cf 638 return _mm_set_epi64 (__A, __A);
e829311e 639}
640
517b0286 641extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 642_mm_set1_epi32 (int __A)
e829311e 643{
ad2c46cf 644 return _mm_set_epi32 (__A, __A, __A, __A);
e829311e 645}
646
517b0286 647extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 648_mm_set1_epi16 (short __A)
e829311e 649{
ad2c46cf 650 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
e829311e 651}
652
517b0286 653extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 654_mm_set1_epi8 (char __A)
e829311e 655{
ad2c46cf 656 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
657 __A, __A, __A, __A, __A, __A, __A, __A);
658}
e829311e 659
ad2c46cf 660/* Create a vector of Qi, where i is the element number.
661 The parameter order is reversed from the _mm_set_epi* functions. */
e829311e 662
517b0286 663extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 664_mm_setr_epi64 (__m64 __q0, __m64 __q1)
665{
666 return _mm_set_epi64 (__q1, __q0);
e829311e 667}
668
517b0286 669extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 670_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
e829311e 671{
ad2c46cf 672 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
e829311e 673}
e829311e 674
517b0286 675extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 676_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
677 short __q4, short __q5, short __q6, short __q7)
e829311e 678{
ad2c46cf 679 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
e829311e 680}
681
517b0286 682extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 683_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
684 char __q04, char __q05, char __q06, char __q07,
685 char __q08, char __q09, char __q10, char __q11,
686 char __q12, char __q13, char __q14, char __q15)
e829311e 687{
ad2c46cf 688 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
689 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
690}
e829311e 691
ad2c46cf 692/* Create a vector with element 0 as *P and the rest zero. */
e829311e 693
517b0286 694extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 695_mm_load_si128 (__m128i const *__P)
696{
697 return *__P;
e829311e 698}
699
517b0286 700extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8036ac7f 701_mm_loadu_si128 (__m128i_u const *__P)
e829311e 702{
8036ac7f 703 return *__P;
e829311e 704}
705
517b0286 706extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4eff5620 707_mm_loadl_epi64 (__m128i_u const *__P)
e829311e 708{
4eff5620 709 return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
e829311e 710}
711
a6fed30b 712extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
713_mm_loadu_si64 (void const *__P)
714{
715 return _mm_loadl_epi64 ((__m128i_u *)__P);
716}
717
517b0286 718extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 719_mm_store_si128 (__m128i *__P, __m128i __B)
e829311e 720{
ad2c46cf 721 *__P = __B;
e829311e 722}
e829311e 723
517b0286 724extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8036ac7f 725_mm_storeu_si128 (__m128i_u *__P, __m128i __B)
e829311e 726{
8036ac7f 727 *__P = __B;
e829311e 728}
729
517b0286 730extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4eff5620 731_mm_storel_epi64 (__m128i_u *__P, __m128i __B)
e829311e 732{
4eff5620 733 *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
e829311e 734}
735
a6fed30b 736extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
737_mm_storeu_si64 (void *__P, __m128i __B)
738{
739 _mm_storel_epi64 ((__m128i_u *)__P, __B);
740}
741
517b0286 742extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 743_mm_movepi64_pi64 (__m128i __B)
e829311e 744{
d521a5b2 745 return (__m64) ((__v2di)__B)[0];
e829311e 746}
747
517b0286 748extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 749_mm_movpi64_epi64 (__m64 __A)
e829311e 750{
ad2c46cf 751 return _mm_set_epi64 ((__m64)0LL, __A);
e829311e 752}
ad2c46cf 753
517b0286 754extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 755_mm_move_epi64 (__m128i __A)
e829311e 756{
c578ed7f 757 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
e829311e 758}
759
0fc245cd 760/* Create an undefined vector. */
761extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762_mm_undefined_si128 (void)
763{
764 __m128i __Y = __Y;
765 return __Y;
766}
767
ad2c46cf 768/* Create a vector of zeros. */
517b0286 769extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 770_mm_setzero_si128 (void)
e829311e 771{
882b157f 772 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
e829311e 773}
774
517b0286 775extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 776_mm_cvtepi32_pd (__m128i __A)
777{
778 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
779}
780
517b0286 781extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 782_mm_cvtepi32_ps (__m128i __A)
783{
784 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
785}
786
517b0286 787extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 788_mm_cvtpd_epi32 (__m128d __A)
789{
790 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
791}
792
517b0286 793extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 794_mm_cvtpd_pi32 (__m128d __A)
795{
796 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
797}
798
517b0286 799extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 800_mm_cvtpd_ps (__m128d __A)
801{
802 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
803}
804
517b0286 805extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 806_mm_cvttpd_epi32 (__m128d __A)
807{
808 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
809}
810
517b0286 811extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 812_mm_cvttpd_pi32 (__m128d __A)
813{
814 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
815}
816
517b0286 817extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 818_mm_cvtpi32_pd (__m64 __A)
819{
820 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
821}
822
517b0286 823extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 824_mm_cvtps_epi32 (__m128 __A)
825{
826 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
827}
828
517b0286 829extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 830_mm_cvttps_epi32 (__m128 __A)
831{
832 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
833}
834
517b0286 835extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 836_mm_cvtps_pd (__m128 __A)
837{
838 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
839}
840
517b0286 841extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 842_mm_cvtsd_si32 (__m128d __A)
843{
844 return __builtin_ia32_cvtsd2si ((__v2df) __A);
845}
846
847#ifdef __x86_64__
52fdc46e 848/* Intel intrinsic. */
517b0286 849extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 850_mm_cvtsd_si64 (__m128d __A)
851{
852 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
853}
854
855/* Microsoft intrinsic. */
517b0286 856extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 857_mm_cvtsd_si64x (__m128d __A)
858{
859 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
860}
861#endif
862
517b0286 863extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 864_mm_cvttsd_si32 (__m128d __A)
865{
866 return __builtin_ia32_cvttsd2si ((__v2df) __A);
867}
868
869#ifdef __x86_64__
52fdc46e 870/* Intel intrinsic. */
517b0286 871extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 872_mm_cvttsd_si64 (__m128d __A)
873{
874 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
875}
876
877/* Microsoft intrinsic. */
517b0286 878extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 879_mm_cvttsd_si64x (__m128d __A)
880{
881 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
882}
883#endif
884
517b0286 885extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 886_mm_cvtsd_ss (__m128 __A, __m128d __B)
887{
888 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
889}
890
517b0286 891extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 892_mm_cvtsi32_sd (__m128d __A, int __B)
893{
894 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
895}
896
897#ifdef __x86_64__
52fdc46e 898/* Intel intrinsic. */
517b0286 899extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 900_mm_cvtsi64_sd (__m128d __A, long long __B)
901{
902 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
903}
904
905/* Microsoft intrinsic. */
517b0286 906extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 907_mm_cvtsi64x_sd (__m128d __A, long long __B)
908{
909 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
910}
911#endif
912
517b0286 913extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 914_mm_cvtss_sd (__m128d __A, __m128 __B)
915{
916 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
917}
918
1a60bb06 919#ifdef __OPTIMIZE__
517b0286 920extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712fea20 921_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
922{
923 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
924}
d61e5c1b 925#else
8d308471 926#define _mm_shuffle_pd(A, B, N) \
927 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
928 (__v2df)(__m128d)(B), (int)(N)))
d61e5c1b 929#endif
e829311e 930
517b0286 931extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 932_mm_unpackhi_pd (__m128d __A, __m128d __B)
933{
934 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
935}
936
517b0286 937extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 938_mm_unpacklo_pd (__m128d __A, __m128d __B)
939{
940 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
941}
942
517b0286 943extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 944_mm_loadh_pd (__m128d __A, double const *__B)
945{
f4d20c2a 946 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
e829311e 947}
948
517b0286 949extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 950_mm_loadl_pd (__m128d __A, double const *__B)
951{
f4d20c2a 952 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
e829311e 953}
954
517b0286 955extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 956_mm_movemask_pd (__m128d __A)
957{
958 return __builtin_ia32_movmskpd ((__v2df)__A);
959}
960
517b0286 961extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 962_mm_packs_epi16 (__m128i __A, __m128i __B)
963{
964 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
965}
966
517b0286 967extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 968_mm_packs_epi32 (__m128i __A, __m128i __B)
969{
970 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
971}
972
517b0286 973extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 974_mm_packus_epi16 (__m128i __A, __m128i __B)
975{
976 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
977}
978
517b0286 979extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 980_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
981{
982 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
983}
984
517b0286 985extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 986_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
987{
988 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
989}
990
517b0286 991extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 992_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
993{
994 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
995}
996
517b0286 997extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 998_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
999{
1000 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
1001}
1002
517b0286 1003extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1004_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
1005{
1006 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
1007}
1008
517b0286 1009extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1010_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
1011{
1012 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
1013}
1014
517b0286 1015extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1016_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1017{
1018 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1019}
1020
517b0286 1021extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1022_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1023{
1024 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1025}
1026
517b0286 1027extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1028_mm_add_epi8 (__m128i __A, __m128i __B)
1029{
d521a5b2 1030 return (__m128i) ((__v16qu)__A + (__v16qu)__B);
e829311e 1031}
1032
517b0286 1033extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1034_mm_add_epi16 (__m128i __A, __m128i __B)
1035{
d521a5b2 1036 return (__m128i) ((__v8hu)__A + (__v8hu)__B);
e829311e 1037}
1038
517b0286 1039extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1040_mm_add_epi32 (__m128i __A, __m128i __B)
1041{
d521a5b2 1042 return (__m128i) ((__v4su)__A + (__v4su)__B);
e829311e 1043}
1044
517b0286 1045extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1046_mm_add_epi64 (__m128i __A, __m128i __B)
1047{
d521a5b2 1048 return (__m128i) ((__v2du)__A + (__v2du)__B);
e829311e 1049}
1050
517b0286 1051extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1052_mm_adds_epi8 (__m128i __A, __m128i __B)
1053{
1054 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1055}
1056
517b0286 1057extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1058_mm_adds_epi16 (__m128i __A, __m128i __B)
1059{
1060 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1061}
1062
517b0286 1063extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1064_mm_adds_epu8 (__m128i __A, __m128i __B)
1065{
1066 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1067}
1068
517b0286 1069extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1070_mm_adds_epu16 (__m128i __A, __m128i __B)
1071{
1072 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1073}
1074
517b0286 1075extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1076_mm_sub_epi8 (__m128i __A, __m128i __B)
1077{
d521a5b2 1078 return (__m128i) ((__v16qu)__A - (__v16qu)__B);
e829311e 1079}
1080
517b0286 1081extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1082_mm_sub_epi16 (__m128i __A, __m128i __B)
1083{
d521a5b2 1084 return (__m128i) ((__v8hu)__A - (__v8hu)__B);
e829311e 1085}
1086
517b0286 1087extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1088_mm_sub_epi32 (__m128i __A, __m128i __B)
1089{
d521a5b2 1090 return (__m128i) ((__v4su)__A - (__v4su)__B);
e829311e 1091}
1092
517b0286 1093extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1094_mm_sub_epi64 (__m128i __A, __m128i __B)
1095{
d521a5b2 1096 return (__m128i) ((__v2du)__A - (__v2du)__B);
e829311e 1097}
1098
517b0286 1099extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1100_mm_subs_epi8 (__m128i __A, __m128i __B)
1101{
1102 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1103}
1104
517b0286 1105extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1106_mm_subs_epi16 (__m128i __A, __m128i __B)
1107{
1108 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1109}
1110
517b0286 1111extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1112_mm_subs_epu8 (__m128i __A, __m128i __B)
1113{
1114 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1115}
1116
517b0286 1117extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1118_mm_subs_epu16 (__m128i __A, __m128i __B)
1119{
1120 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1121}
1122
517b0286 1123extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1124_mm_madd_epi16 (__m128i __A, __m128i __B)
1125{
1126 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1127}
1128
517b0286 1129extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1130_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1131{
1132 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1133}
1134
517b0286 1135extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1136_mm_mullo_epi16 (__m128i __A, __m128i __B)
1137{
d521a5b2 1138 return (__m128i) ((__v8hu)__A * (__v8hu)__B);
e829311e 1139}
1140
517b0286 1141extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1142_mm_mul_su32 (__m64 __A, __m64 __B)
1143{
1144 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1145}
1146
517b0286 1147extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1148_mm_mul_epu32 (__m128i __A, __m128i __B)
1149{
1150 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1151}
1152
517b0286 1153extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1154_mm_slli_epi16 (__m128i __A, int __B)
e829311e 1155{
5802c0cb 1156 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
e829311e 1157}
1158
517b0286 1159extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1160_mm_slli_epi32 (__m128i __A, int __B)
e829311e 1161{
5802c0cb 1162 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
e829311e 1163}
1164
517b0286 1165extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1166_mm_slli_epi64 (__m128i __A, int __B)
e829311e 1167{
5802c0cb 1168 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
e829311e 1169}
1170
517b0286 1171extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1172_mm_srai_epi16 (__m128i __A, int __B)
e829311e 1173{
5802c0cb 1174 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
e829311e 1175}
1176
517b0286 1177extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1178_mm_srai_epi32 (__m128i __A, int __B)
e829311e 1179{
5802c0cb 1180 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
e829311e 1181}
1182
1a60bb06 1183#ifdef __OPTIMIZE__
89862b24 1184extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1185_mm_bsrli_si128 (__m128i __A, const int __N)
1186{
1187 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1188}
1189
1190extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191_mm_bslli_si128 (__m128i __A, const int __N)
1192{
1193 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1194}
1195
517b0286 1196extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8d308471 1197_mm_srli_si128 (__m128i __A, const int __N)
e829311e 1198{
8d308471 1199 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
e829311e 1200}
1201
517b0286 1202extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8d308471 1203_mm_slli_si128 (__m128i __A, const int __N)
e829311e 1204{
8d308471 1205 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
e829311e 1206}
d61e5c1b 1207#else
89862b24 1208#define _mm_bsrli_si128(A, N) \
1209 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1210#define _mm_bslli_si128(A, N) \
1211 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
8d308471 1212#define _mm_srli_si128(A, N) \
1213 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1214#define _mm_slli_si128(A, N) \
1215 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
d61e5c1b 1216#endif
e829311e 1217
517b0286 1218extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1219_mm_srli_epi16 (__m128i __A, int __B)
e829311e 1220{
5802c0cb 1221 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
e829311e 1222}
1223
517b0286 1224extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1225_mm_srli_epi32 (__m128i __A, int __B)
e829311e 1226{
5802c0cb 1227 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
e829311e 1228}
1229
517b0286 1230extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
8071f7e8 1231_mm_srli_epi64 (__m128i __A, int __B)
e829311e 1232{
5802c0cb 1233 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
e829311e 1234}
1235
517b0286 1236extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1237_mm_sll_epi16 (__m128i __A, __m128i __B)
e829311e 1238{
1a510660 1239 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
e829311e 1240}
1241
517b0286 1242extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1243_mm_sll_epi32 (__m128i __A, __m128i __B)
e829311e 1244{
1a510660 1245 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
e829311e 1246}
1247
517b0286 1248extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1249_mm_sll_epi64 (__m128i __A, __m128i __B)
e829311e 1250{
1a510660 1251 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
e829311e 1252}
1253
517b0286 1254extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1255_mm_sra_epi16 (__m128i __A, __m128i __B)
e829311e 1256{
1a510660 1257 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
e829311e 1258}
1259
517b0286 1260extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1261_mm_sra_epi32 (__m128i __A, __m128i __B)
e829311e 1262{
1a510660 1263 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
e829311e 1264}
e829311e 1265
517b0286 1266extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1267_mm_srl_epi16 (__m128i __A, __m128i __B)
e829311e 1268{
1a510660 1269 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
e829311e 1270}
1271
517b0286 1272extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1273_mm_srl_epi32 (__m128i __A, __m128i __B)
e829311e 1274{
1a510660 1275 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
e829311e 1276}
1277
517b0286 1278extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
5802c0cb 1279_mm_srl_epi64 (__m128i __A, __m128i __B)
e829311e 1280{
1a510660 1281 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
e829311e 1282}
1283
517b0286 1284extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1285_mm_and_si128 (__m128i __A, __m128i __B)
1286{
d521a5b2 1287 return (__m128i) ((__v2du)__A & (__v2du)__B);
e829311e 1288}
1289
517b0286 1290extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1291_mm_andnot_si128 (__m128i __A, __m128i __B)
1292{
1293 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1294}
1295
517b0286 1296extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1297_mm_or_si128 (__m128i __A, __m128i __B)
1298{
d521a5b2 1299 return (__m128i) ((__v2du)__A | (__v2du)__B);
e829311e 1300}
1301
517b0286 1302extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1303_mm_xor_si128 (__m128i __A, __m128i __B)
1304{
d521a5b2 1305 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
e829311e 1306}
1307
517b0286 1308extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1309_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1310{
668cbe0a 1311 return (__m128i) ((__v16qi)__A == (__v16qi)__B);
e829311e 1312}
1313
517b0286 1314extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1315_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1316{
d521a5b2 1317 return (__m128i) ((__v8hi)__A == (__v8hi)__B);
e829311e 1318}
1319
517b0286 1320extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1321_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1322{
d521a5b2 1323 return (__m128i) ((__v4si)__A == (__v4si)__B);
e829311e 1324}
1325
517b0286 1326extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1327_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1328{
fba369f3 1329 return (__m128i) ((__v16qs)__A < (__v16qs)__B);
e829311e 1330}
1331
517b0286 1332extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1333_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1334{
d521a5b2 1335 return (__m128i) ((__v8hi)__A < (__v8hi)__B);
e829311e 1336}
1337
517b0286 1338extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1339_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1340{
d521a5b2 1341 return (__m128i) ((__v4si)__A < (__v4si)__B);
e829311e 1342}
1343
517b0286 1344extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1345_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1346{
fba369f3 1347 return (__m128i) ((__v16qs)__A > (__v16qs)__B);
e829311e 1348}
1349
517b0286 1350extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1351_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1352{
d521a5b2 1353 return (__m128i) ((__v8hi)__A > (__v8hi)__B);
e829311e 1354}
1355
517b0286 1356extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1357_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1358{
d521a5b2 1359 return (__m128i) ((__v4si)__A > (__v4si)__B);
e829311e 1360}
1361
1a60bb06 1362#ifdef __OPTIMIZE__
517b0286 1363extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 1364_mm_extract_epi16 (__m128i const __A, int const __N)
1365{
c9b3a2b9 1366 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
ad2c46cf 1367}
e829311e 1368
517b0286 1369extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
ad2c46cf 1370_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1371{
1372 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1373}
d61e5c1b 1374#else
1375#define _mm_extract_epi16(A, N) \
c9b3a2b9 1376 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
5f76c0f8 1377#define _mm_insert_epi16(A, D, N) \
1378 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1379 (int)(D), (int)(N)))
d61e5c1b 1380#endif
e829311e 1381
517b0286 1382extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1383_mm_max_epi16 (__m128i __A, __m128i __B)
1384{
1385 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1386}
1387
517b0286 1388extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1389_mm_max_epu8 (__m128i __A, __m128i __B)
1390{
1391 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1392}
1393
517b0286 1394extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1395_mm_min_epi16 (__m128i __A, __m128i __B)
1396{
1397 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1398}
1399
517b0286 1400extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1401_mm_min_epu8 (__m128i __A, __m128i __B)
1402{
1403 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1404}
1405
517b0286 1406extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1407_mm_movemask_epi8 (__m128i __A)
1408{
1409 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1410}
1411
517b0286 1412extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1413_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1414{
1415 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1416}
1417
1a60bb06 1418#ifdef __OPTIMIZE__
517b0286 1419extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712fea20 1420_mm_shufflehi_epi16 (__m128i __A, const int __mask)
1421{
1422 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1423}
1424
517b0286 1425extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712fea20 1426_mm_shufflelo_epi16 (__m128i __A, const int __mask)
1427{
1428 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1429}
1430
517b0286 1431extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712fea20 1432_mm_shuffle_epi32 (__m128i __A, const int __mask)
1433{
1434 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1435}
d61e5c1b 1436#else
8d308471 1437#define _mm_shufflehi_epi16(A, N) \
1438 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1439#define _mm_shufflelo_epi16(A, N) \
1440 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1441#define _mm_shuffle_epi32(A, N) \
1442 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
d61e5c1b 1443#endif
e829311e 1444
517b0286 1445extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1446_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1447{
1448 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1449}
1450
517b0286 1451extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1452_mm_avg_epu8 (__m128i __A, __m128i __B)
1453{
1454 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1455}
1456
517b0286 1457extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1458_mm_avg_epu16 (__m128i __A, __m128i __B)
1459{
1460 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1461}
1462
517b0286 1463extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1464_mm_sad_epu8 (__m128i __A, __m128i __B)
1465{
1466 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1467}
1468
517b0286 1469extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1470_mm_stream_si32 (int *__A, int __B)
1471{
1472 __builtin_ia32_movnti (__A, __B);
1473}
1474
821b85a2 1475#ifdef __x86_64__
1476extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1477_mm_stream_si64 (long long int *__A, long long int __B)
1478{
1479 __builtin_ia32_movnti64 (__A, __B);
1480}
1481#endif
1482
517b0286 1483extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1484_mm_stream_si128 (__m128i *__A, __m128i __B)
1485{
1486 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1487}
1488
517b0286 1489extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1490_mm_stream_pd (double *__A, __m128d __B)
1491{
1492 __builtin_ia32_movntpd (__A, (__v2df)__B);
1493}
1494
517b0286 1495extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1496_mm_clflush (void const *__A)
1497{
882b157f 1498 __builtin_ia32_clflush (__A);
e829311e 1499}
1500
517b0286 1501extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1502_mm_lfence (void)
1503{
1504 __builtin_ia32_lfence ();
1505}
1506
517b0286 1507extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1508_mm_mfence (void)
1509{
1510 __builtin_ia32_mfence ();
1511}
1512
517b0286 1513extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1514_mm_cvtsi32_si128 (int __A)
1515{
e3807cc7 1516 return _mm_set_epi32 (0, 0, 0, __A);
e829311e 1517}
1518
1519#ifdef __x86_64__
52fdc46e 1520/* Intel intrinsic. */
517b0286 1521extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
52fdc46e 1522_mm_cvtsi64_si128 (long long __A)
1523{
1524 return _mm_set_epi64x (0, __A);
1525}
1526
1527/* Microsoft intrinsic. */
517b0286 1528extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
e829311e 1529_mm_cvtsi64x_si128 (long long __A)
1530{
e3807cc7 1531 return _mm_set_epi64x (0, __A);
e829311e 1532}
1533#endif
1534
af87506b 1535/* Casts between various SP, DP, INT vector types. Note that these do no
1536 conversion of values, they just change the type. */
517b0286 1537extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1538_mm_castpd_ps(__m128d __A)
1539{
1540 return (__m128) __A;
1541}
1542
517b0286 1543extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1544_mm_castpd_si128(__m128d __A)
1545{
1546 return (__m128i) __A;
1547}
1548
517b0286 1549extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1550_mm_castps_pd(__m128 __A)
1551{
1552 return (__m128d) __A;
1553}
1554
517b0286 1555extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1556_mm_castps_si128(__m128 __A)
1557{
1558 return (__m128i) __A;
1559}
1560
517b0286 1561extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1562_mm_castsi128_ps(__m128i __A)
1563{
1564 return (__m128) __A;
1565}
1566
517b0286 1567extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
af87506b 1568_mm_castsi128_pd(__m128i __A)
1569{
1570 return (__m128d) __A;
1571}
1572
ef21d40e 1573#ifdef __DISABLE_SSE2__
1574#undef __DISABLE_SSE2__
1575#pragma GCC pop_options
1576#endif /* __DISABLE_SSE2__ */
e829311e 1577
1578#endif /* _EMMINTRIN_H_INCLUDED */