]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/emmintrin.h
1 /* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
2 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to
18 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA. */
21 /* As a special exception, if you include this header file into source
22 files compiled by GCC, this header file does not by itself cause
23 the resulting executable to be covered by the GNU General Public
24 License. This exception does not however invalidate any other
25 reasons why the executable file might be covered by the GNU General
28 /* Implemented from the specification included in the Intel C++ Compiler
29 User Guide and Reference, version 9.0. */
31 #ifndef _EMMINTRIN_H_INCLUDED
32 #define _EMMINTRIN_H_INCLUDED
35 # error "SSE2 instruction set not enabled"
38 /* We need definitions from the SSE header files*/
39 #include <xmmintrin.h>
42 typedef double __v2df
__attribute__ ((__vector_size__ (16)));
43 typedef long long __v2di
__attribute__ ((__vector_size__ (16)));
44 typedef int __v4si
__attribute__ ((__vector_size__ (16)));
45 typedef short __v8hi
__attribute__ ((__vector_size__ (16)));
46 typedef char __v16qi
__attribute__ ((__vector_size__ (16)));
48 /* The Intel API is flexible enough that we must allow aliasing with other
49 vector types, and their scalar components. */
50 typedef long long __m128i
__attribute__ ((__vector_size__ (16), __may_alias__
));
51 typedef double __m128d
__attribute__ ((__vector_size__ (16), __may_alias__
));
53 /* Create a selector for use with the SHUFPD instruction. */
54 #define _MM_SHUFFLE2(fp1,fp0) \
55 (((fp1) << 1) | (fp0))
57 /* Create a vector with element 0 as F and the rest zero. */
58 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
59 _mm_set_sd (double __F
)
61 return __extension__ (__m128d
){ __F
, 0.0 };
64 /* Create a vector with both elements equal to F. */
65 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
66 _mm_set1_pd (double __F
)
68 return __extension__ (__m128d
){ __F
, __F
};
71 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
72 _mm_set_pd1 (double __F
)
74 return _mm_set1_pd (__F
);
77 /* Create a vector with the lower value X and upper value W. */
78 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
79 _mm_set_pd (double __W
, double __X
)
81 return __extension__ (__m128d
){ __X
, __W
};
84 /* Create a vector with the lower value W and upper value X. */
85 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
86 _mm_setr_pd (double __W
, double __X
)
88 return __extension__ (__m128d
){ __W
, __X
};
91 /* Create a vector of zeros. */
92 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
95 return __extension__ (__m128d
){ 0.0, 0.0 };
98 /* Sets the low DPFP value of A from the low value of B. */
99 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
100 _mm_move_sd (__m128d __A
, __m128d __B
)
102 return (__m128d
) __builtin_ia32_movsd ((__v2df
)__A
, (__v2df
)__B
);
105 /* Load two DPFP values from P. The address must be 16-byte aligned. */
106 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
107 _mm_load_pd (double const *__P
)
109 return *(__m128d
*)__P
;
112 /* Load two DPFP values from P. The address need not be 16-byte aligned. */
113 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
114 _mm_loadu_pd (double const *__P
)
116 return __builtin_ia32_loadupd (__P
);
119 /* Create a vector with all two elements equal to *P. */
120 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
121 _mm_load1_pd (double const *__P
)
123 return _mm_set1_pd (*__P
);
126 /* Create a vector with element 0 as *P and the rest zero. */
127 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
128 _mm_load_sd (double const *__P
)
130 return _mm_set_sd (*__P
);
133 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
134 _mm_load_pd1 (double const *__P
)
136 return _mm_load1_pd (__P
);
139 /* Load two DPFP values in reverse order. The address must be aligned. */
140 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
141 _mm_loadr_pd (double const *__P
)
143 __m128d __tmp
= _mm_load_pd (__P
);
144 return __builtin_ia32_shufpd (__tmp
, __tmp
, _MM_SHUFFLE2 (0,1));
147 /* Store two DPFP values. The address must be 16-byte aligned. */
148 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
149 _mm_store_pd (double *__P
, __m128d __A
)
151 *(__m128d
*)__P
= __A
;
154 /* Store two DPFP values. The address need not be 16-byte aligned. */
155 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
156 _mm_storeu_pd (double *__P
, __m128d __A
)
158 __builtin_ia32_storeupd (__P
, __A
);
161 /* Stores the lower DPFP value. */
162 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
163 _mm_store_sd (double *__P
, __m128d __A
)
165 *__P
= __builtin_ia32_vec_ext_v2df (__A
, 0);
168 extern __inline
double __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
169 _mm_cvtsd_f64 (__m128d __A
)
171 return __builtin_ia32_vec_ext_v2df (__A
, 0);
174 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
175 _mm_storel_pd (double *__P
, __m128d __A
)
177 _mm_store_sd (__P
, __A
);
180 /* Stores the upper DPFP value. */
181 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
182 _mm_storeh_pd (double *__P
, __m128d __A
)
184 *__P
= __builtin_ia32_vec_ext_v2df (__A
, 1);
187 /* Store the lower DPFP value across two words.
188 The address must be 16-byte aligned. */
189 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
190 _mm_store1_pd (double *__P
, __m128d __A
)
192 _mm_store_pd (__P
, __builtin_ia32_shufpd (__A
, __A
, _MM_SHUFFLE2 (0,0)));
195 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
196 _mm_store_pd1 (double *__P
, __m128d __A
)
198 _mm_store1_pd (__P
, __A
);
201 /* Store two DPFP values in reverse order. The address must be aligned. */
202 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
203 _mm_storer_pd (double *__P
, __m128d __A
)
205 _mm_store_pd (__P
, __builtin_ia32_shufpd (__A
, __A
, _MM_SHUFFLE2 (0,1)));
208 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
209 _mm_cvtsi128_si32 (__m128i __A
)
211 return __builtin_ia32_vec_ext_v4si ((__v4si
)__A
, 0);
215 /* Intel intrinsic. */
216 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
217 _mm_cvtsi128_si64 (__m128i __A
)
219 return __builtin_ia32_vec_ext_v2di ((__v2di
)__A
, 0);
222 /* Microsoft intrinsic. */
223 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
224 _mm_cvtsi128_si64x (__m128i __A
)
226 return __builtin_ia32_vec_ext_v2di ((__v2di
)__A
, 0);
230 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
231 _mm_add_pd (__m128d __A
, __m128d __B
)
233 return (__m128d
)__builtin_ia32_addpd ((__v2df
)__A
, (__v2df
)__B
);
236 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
237 _mm_add_sd (__m128d __A
, __m128d __B
)
239 return (__m128d
)__builtin_ia32_addsd ((__v2df
)__A
, (__v2df
)__B
);
242 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
243 _mm_sub_pd (__m128d __A
, __m128d __B
)
245 return (__m128d
)__builtin_ia32_subpd ((__v2df
)__A
, (__v2df
)__B
);
248 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
249 _mm_sub_sd (__m128d __A
, __m128d __B
)
251 return (__m128d
)__builtin_ia32_subsd ((__v2df
)__A
, (__v2df
)__B
);
254 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
255 _mm_mul_pd (__m128d __A
, __m128d __B
)
257 return (__m128d
)__builtin_ia32_mulpd ((__v2df
)__A
, (__v2df
)__B
);
260 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
261 _mm_mul_sd (__m128d __A
, __m128d __B
)
263 return (__m128d
)__builtin_ia32_mulsd ((__v2df
)__A
, (__v2df
)__B
);
266 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
267 _mm_div_pd (__m128d __A
, __m128d __B
)
269 return (__m128d
)__builtin_ia32_divpd ((__v2df
)__A
, (__v2df
)__B
);
272 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
273 _mm_div_sd (__m128d __A
, __m128d __B
)
275 return (__m128d
)__builtin_ia32_divsd ((__v2df
)__A
, (__v2df
)__B
);
278 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
279 _mm_sqrt_pd (__m128d __A
)
281 return (__m128d
)__builtin_ia32_sqrtpd ((__v2df
)__A
);
284 /* Return pair {sqrt (A[0), B[1]}. */
285 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
286 _mm_sqrt_sd (__m128d __A
, __m128d __B
)
288 __v2df __tmp
= __builtin_ia32_movsd ((__v2df
)__A
, (__v2df
)__B
);
289 return (__m128d
)__builtin_ia32_sqrtsd ((__v2df
)__tmp
);
292 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
293 _mm_min_pd (__m128d __A
, __m128d __B
)
295 return (__m128d
)__builtin_ia32_minpd ((__v2df
)__A
, (__v2df
)__B
);
298 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
299 _mm_min_sd (__m128d __A
, __m128d __B
)
301 return (__m128d
)__builtin_ia32_minsd ((__v2df
)__A
, (__v2df
)__B
);
304 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
305 _mm_max_pd (__m128d __A
, __m128d __B
)
307 return (__m128d
)__builtin_ia32_maxpd ((__v2df
)__A
, (__v2df
)__B
);
310 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
311 _mm_max_sd (__m128d __A
, __m128d __B
)
313 return (__m128d
)__builtin_ia32_maxsd ((__v2df
)__A
, (__v2df
)__B
);
316 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
317 _mm_and_pd (__m128d __A
, __m128d __B
)
319 return (__m128d
)__builtin_ia32_andpd ((__v2df
)__A
, (__v2df
)__B
);
322 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
323 _mm_andnot_pd (__m128d __A
, __m128d __B
)
325 return (__m128d
)__builtin_ia32_andnpd ((__v2df
)__A
, (__v2df
)__B
);
328 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
329 _mm_or_pd (__m128d __A
, __m128d __B
)
331 return (__m128d
)__builtin_ia32_orpd ((__v2df
)__A
, (__v2df
)__B
);
334 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
335 _mm_xor_pd (__m128d __A
, __m128d __B
)
337 return (__m128d
)__builtin_ia32_xorpd ((__v2df
)__A
, (__v2df
)__B
);
340 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
341 _mm_cmpeq_pd (__m128d __A
, __m128d __B
)
343 return (__m128d
)__builtin_ia32_cmpeqpd ((__v2df
)__A
, (__v2df
)__B
);
346 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
347 _mm_cmplt_pd (__m128d __A
, __m128d __B
)
349 return (__m128d
)__builtin_ia32_cmpltpd ((__v2df
)__A
, (__v2df
)__B
);
352 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
353 _mm_cmple_pd (__m128d __A
, __m128d __B
)
355 return (__m128d
)__builtin_ia32_cmplepd ((__v2df
)__A
, (__v2df
)__B
);
358 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
359 _mm_cmpgt_pd (__m128d __A
, __m128d __B
)
361 return (__m128d
)__builtin_ia32_cmpgtpd ((__v2df
)__A
, (__v2df
)__B
);
364 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
365 _mm_cmpge_pd (__m128d __A
, __m128d __B
)
367 return (__m128d
)__builtin_ia32_cmpgepd ((__v2df
)__A
, (__v2df
)__B
);
370 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
371 _mm_cmpneq_pd (__m128d __A
, __m128d __B
)
373 return (__m128d
)__builtin_ia32_cmpneqpd ((__v2df
)__A
, (__v2df
)__B
);
376 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
377 _mm_cmpnlt_pd (__m128d __A
, __m128d __B
)
379 return (__m128d
)__builtin_ia32_cmpnltpd ((__v2df
)__A
, (__v2df
)__B
);
382 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
383 _mm_cmpnle_pd (__m128d __A
, __m128d __B
)
385 return (__m128d
)__builtin_ia32_cmpnlepd ((__v2df
)__A
, (__v2df
)__B
);
388 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
389 _mm_cmpngt_pd (__m128d __A
, __m128d __B
)
391 return (__m128d
)__builtin_ia32_cmpngtpd ((__v2df
)__A
, (__v2df
)__B
);
394 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
395 _mm_cmpnge_pd (__m128d __A
, __m128d __B
)
397 return (__m128d
)__builtin_ia32_cmpngepd ((__v2df
)__A
, (__v2df
)__B
);
400 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
401 _mm_cmpord_pd (__m128d __A
, __m128d __B
)
403 return (__m128d
)__builtin_ia32_cmpordpd ((__v2df
)__A
, (__v2df
)__B
);
406 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
407 _mm_cmpunord_pd (__m128d __A
, __m128d __B
)
409 return (__m128d
)__builtin_ia32_cmpunordpd ((__v2df
)__A
, (__v2df
)__B
);
412 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
413 _mm_cmpeq_sd (__m128d __A
, __m128d __B
)
415 return (__m128d
)__builtin_ia32_cmpeqsd ((__v2df
)__A
, (__v2df
)__B
);
418 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
419 _mm_cmplt_sd (__m128d __A
, __m128d __B
)
421 return (__m128d
)__builtin_ia32_cmpltsd ((__v2df
)__A
, (__v2df
)__B
);
424 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
425 _mm_cmple_sd (__m128d __A
, __m128d __B
)
427 return (__m128d
)__builtin_ia32_cmplesd ((__v2df
)__A
, (__v2df
)__B
);
430 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
431 _mm_cmpgt_sd (__m128d __A
, __m128d __B
)
433 return (__m128d
) __builtin_ia32_movsd ((__v2df
) __A
,
435 __builtin_ia32_cmpltsd ((__v2df
) __B
,
440 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
441 _mm_cmpge_sd (__m128d __A
, __m128d __B
)
443 return (__m128d
) __builtin_ia32_movsd ((__v2df
) __A
,
445 __builtin_ia32_cmplesd ((__v2df
) __B
,
450 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
451 _mm_cmpneq_sd (__m128d __A
, __m128d __B
)
453 return (__m128d
)__builtin_ia32_cmpneqsd ((__v2df
)__A
, (__v2df
)__B
);
456 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
457 _mm_cmpnlt_sd (__m128d __A
, __m128d __B
)
459 return (__m128d
)__builtin_ia32_cmpnltsd ((__v2df
)__A
, (__v2df
)__B
);
462 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
463 _mm_cmpnle_sd (__m128d __A
, __m128d __B
)
465 return (__m128d
)__builtin_ia32_cmpnlesd ((__v2df
)__A
, (__v2df
)__B
);
468 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
469 _mm_cmpngt_sd (__m128d __A
, __m128d __B
)
471 return (__m128d
) __builtin_ia32_movsd ((__v2df
) __A
,
473 __builtin_ia32_cmpnltsd ((__v2df
) __B
,
478 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
479 _mm_cmpnge_sd (__m128d __A
, __m128d __B
)
481 return (__m128d
) __builtin_ia32_movsd ((__v2df
) __A
,
483 __builtin_ia32_cmpnlesd ((__v2df
) __B
,
488 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
489 _mm_cmpord_sd (__m128d __A
, __m128d __B
)
491 return (__m128d
)__builtin_ia32_cmpordsd ((__v2df
)__A
, (__v2df
)__B
);
494 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
495 _mm_cmpunord_sd (__m128d __A
, __m128d __B
)
497 return (__m128d
)__builtin_ia32_cmpunordsd ((__v2df
)__A
, (__v2df
)__B
);
500 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
501 _mm_comieq_sd (__m128d __A
, __m128d __B
)
503 return __builtin_ia32_comisdeq ((__v2df
)__A
, (__v2df
)__B
);
506 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
507 _mm_comilt_sd (__m128d __A
, __m128d __B
)
509 return __builtin_ia32_comisdlt ((__v2df
)__A
, (__v2df
)__B
);
512 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
513 _mm_comile_sd (__m128d __A
, __m128d __B
)
515 return __builtin_ia32_comisdle ((__v2df
)__A
, (__v2df
)__B
);
518 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
519 _mm_comigt_sd (__m128d __A
, __m128d __B
)
521 return __builtin_ia32_comisdgt ((__v2df
)__A
, (__v2df
)__B
);
524 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
525 _mm_comige_sd (__m128d __A
, __m128d __B
)
527 return __builtin_ia32_comisdge ((__v2df
)__A
, (__v2df
)__B
);
530 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
531 _mm_comineq_sd (__m128d __A
, __m128d __B
)
533 return __builtin_ia32_comisdneq ((__v2df
)__A
, (__v2df
)__B
);
536 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
537 _mm_ucomieq_sd (__m128d __A
, __m128d __B
)
539 return __builtin_ia32_ucomisdeq ((__v2df
)__A
, (__v2df
)__B
);
542 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
543 _mm_ucomilt_sd (__m128d __A
, __m128d __B
)
545 return __builtin_ia32_ucomisdlt ((__v2df
)__A
, (__v2df
)__B
);
548 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
549 _mm_ucomile_sd (__m128d __A
, __m128d __B
)
551 return __builtin_ia32_ucomisdle ((__v2df
)__A
, (__v2df
)__B
);
554 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
555 _mm_ucomigt_sd (__m128d __A
, __m128d __B
)
557 return __builtin_ia32_ucomisdgt ((__v2df
)__A
, (__v2df
)__B
);
560 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
561 _mm_ucomige_sd (__m128d __A
, __m128d __B
)
563 return __builtin_ia32_ucomisdge ((__v2df
)__A
, (__v2df
)__B
);
566 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
567 _mm_ucomineq_sd (__m128d __A
, __m128d __B
)
569 return __builtin_ia32_ucomisdneq ((__v2df
)__A
, (__v2df
)__B
);
572 /* Create a vector of Qi, where i is the element number. */
574 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
575 _mm_set_epi64x (long long __q1
, long long __q0
)
577 return __extension__ (__m128i
)(__v2di
){ __q0
, __q1
};
580 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
581 _mm_set_epi64 (__m64 __q1
, __m64 __q0
)
583 return _mm_set_epi64x ((long long)__q1
, (long long)__q0
);
586 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
587 _mm_set_epi32 (int __q3
, int __q2
, int __q1
, int __q0
)
589 return __extension__ (__m128i
)(__v4si
){ __q0
, __q1
, __q2
, __q3
};
592 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
593 _mm_set_epi16 (short __q7
, short __q6
, short __q5
, short __q4
,
594 short __q3
, short __q2
, short __q1
, short __q0
)
596 return __extension__ (__m128i
)(__v8hi
){
597 __q0
, __q1
, __q2
, __q3
, __q4
, __q5
, __q6
, __q7
};
600 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
601 _mm_set_epi8 (char __q15
, char __q14
, char __q13
, char __q12
,
602 char __q11
, char __q10
, char __q09
, char __q08
,
603 char __q07
, char __q06
, char __q05
, char __q04
,
604 char __q03
, char __q02
, char __q01
, char __q00
)
606 return __extension__ (__m128i
)(__v16qi
){
607 __q00
, __q01
, __q02
, __q03
, __q04
, __q05
, __q06
, __q07
,
608 __q08
, __q09
, __q10
, __q11
, __q12
, __q13
, __q14
, __q15
612 /* Set all of the elements of the vector to A. */
614 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
615 _mm_set1_epi64x (long long __A
)
617 return _mm_set_epi64x (__A
, __A
);
620 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
621 _mm_set1_epi64 (__m64 __A
)
623 return _mm_set_epi64 (__A
, __A
);
626 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
627 _mm_set1_epi32 (int __A
)
629 return _mm_set_epi32 (__A
, __A
, __A
, __A
);
632 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
633 _mm_set1_epi16 (short __A
)
635 return _mm_set_epi16 (__A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
);
638 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
639 _mm_set1_epi8 (char __A
)
641 return _mm_set_epi8 (__A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
,
642 __A
, __A
, __A
, __A
, __A
, __A
, __A
, __A
);
645 /* Create a vector of Qi, where i is the element number.
646 The parameter order is reversed from the _mm_set_epi* functions. */
648 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
649 _mm_setr_epi64 (__m64 __q0
, __m64 __q1
)
651 return _mm_set_epi64 (__q1
, __q0
);
654 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
655 _mm_setr_epi32 (int __q0
, int __q1
, int __q2
, int __q3
)
657 return _mm_set_epi32 (__q3
, __q2
, __q1
, __q0
);
660 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
661 _mm_setr_epi16 (short __q0
, short __q1
, short __q2
, short __q3
,
662 short __q4
, short __q5
, short __q6
, short __q7
)
664 return _mm_set_epi16 (__q7
, __q6
, __q5
, __q4
, __q3
, __q2
, __q1
, __q0
);
667 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
668 _mm_setr_epi8 (char __q00
, char __q01
, char __q02
, char __q03
,
669 char __q04
, char __q05
, char __q06
, char __q07
,
670 char __q08
, char __q09
, char __q10
, char __q11
,
671 char __q12
, char __q13
, char __q14
, char __q15
)
673 return _mm_set_epi8 (__q15
, __q14
, __q13
, __q12
, __q11
, __q10
, __q09
, __q08
,
674 __q07
, __q06
, __q05
, __q04
, __q03
, __q02
, __q01
, __q00
);
677 /* Create a vector with element 0 as *P and the rest zero. */
679 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
680 _mm_load_si128 (__m128i
const *__P
)
685 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
686 _mm_loadu_si128 (__m128i
const *__P
)
688 return (__m128i
) __builtin_ia32_loaddqu ((char const *)__P
);
691 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
692 _mm_loadl_epi64 (__m128i
const *__P
)
694 return _mm_set_epi64 ((__m64
)0LL, *(__m64
*)__P
);
697 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
698 _mm_store_si128 (__m128i
*__P
, __m128i __B
)
703 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
704 _mm_storeu_si128 (__m128i
*__P
, __m128i __B
)
706 __builtin_ia32_storedqu ((char *)__P
, (__v16qi
)__B
);
709 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
710 _mm_storel_epi64 (__m128i
*__P
, __m128i __B
)
712 *(long long *)__P
= __builtin_ia32_vec_ext_v2di ((__v2di
)__B
, 0);
715 extern __inline __m64
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
716 _mm_movepi64_pi64 (__m128i __B
)
718 return (__m64
) __builtin_ia32_vec_ext_v2di ((__v2di
)__B
, 0);
721 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
722 _mm_movpi64_epi64 (__m64 __A
)
724 return _mm_set_epi64 ((__m64
)0LL, __A
);
727 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
728 _mm_move_epi64 (__m128i __A
)
730 return (__m128i
)__builtin_ia32_movq128 ((__v2di
) __A
);
733 /* Create a vector of zeros. */
734 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
735 _mm_setzero_si128 (void)
737 return __extension__ (__m128i
)(__v4si
){ 0, 0, 0, 0 };
740 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
741 _mm_cvtepi32_pd (__m128i __A
)
743 return (__m128d
)__builtin_ia32_cvtdq2pd ((__v4si
) __A
);
746 extern __inline __m128
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
747 _mm_cvtepi32_ps (__m128i __A
)
749 return (__m128
)__builtin_ia32_cvtdq2ps ((__v4si
) __A
);
752 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
753 _mm_cvtpd_epi32 (__m128d __A
)
755 return (__m128i
)__builtin_ia32_cvtpd2dq ((__v2df
) __A
);
758 extern __inline __m64
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
759 _mm_cvtpd_pi32 (__m128d __A
)
761 return (__m64
)__builtin_ia32_cvtpd2pi ((__v2df
) __A
);
764 extern __inline __m128
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
765 _mm_cvtpd_ps (__m128d __A
)
767 return (__m128
)__builtin_ia32_cvtpd2ps ((__v2df
) __A
);
770 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
771 _mm_cvttpd_epi32 (__m128d __A
)
773 return (__m128i
)__builtin_ia32_cvttpd2dq ((__v2df
) __A
);
776 extern __inline __m64
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
777 _mm_cvttpd_pi32 (__m128d __A
)
779 return (__m64
)__builtin_ia32_cvttpd2pi ((__v2df
) __A
);
782 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
783 _mm_cvtpi32_pd (__m64 __A
)
785 return (__m128d
)__builtin_ia32_cvtpi2pd ((__v2si
) __A
);
788 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
789 _mm_cvtps_epi32 (__m128 __A
)
791 return (__m128i
)__builtin_ia32_cvtps2dq ((__v4sf
) __A
);
794 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
795 _mm_cvttps_epi32 (__m128 __A
)
797 return (__m128i
)__builtin_ia32_cvttps2dq ((__v4sf
) __A
);
800 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
801 _mm_cvtps_pd (__m128 __A
)
803 return (__m128d
)__builtin_ia32_cvtps2pd ((__v4sf
) __A
);
806 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
807 _mm_cvtsd_si32 (__m128d __A
)
809 return __builtin_ia32_cvtsd2si ((__v2df
) __A
);
813 /* Intel intrinsic. */
814 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
815 _mm_cvtsd_si64 (__m128d __A
)
817 return __builtin_ia32_cvtsd2si64 ((__v2df
) __A
);
820 /* Microsoft intrinsic. */
821 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
822 _mm_cvtsd_si64x (__m128d __A
)
824 return __builtin_ia32_cvtsd2si64 ((__v2df
) __A
);
828 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
829 _mm_cvttsd_si32 (__m128d __A
)
831 return __builtin_ia32_cvttsd2si ((__v2df
) __A
);
835 /* Intel intrinsic. */
836 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
837 _mm_cvttsd_si64 (__m128d __A
)
839 return __builtin_ia32_cvttsd2si64 ((__v2df
) __A
);
842 /* Microsoft intrinsic. */
843 extern __inline
long long __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
844 _mm_cvttsd_si64x (__m128d __A
)
846 return __builtin_ia32_cvttsd2si64 ((__v2df
) __A
);
850 extern __inline __m128
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
851 _mm_cvtsd_ss (__m128 __A
, __m128d __B
)
853 return (__m128
)__builtin_ia32_cvtsd2ss ((__v4sf
) __A
, (__v2df
) __B
);
856 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
857 _mm_cvtsi32_sd (__m128d __A
, int __B
)
859 return (__m128d
)__builtin_ia32_cvtsi2sd ((__v2df
) __A
, __B
);
863 /* Intel intrinsic. */
864 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
865 _mm_cvtsi64_sd (__m128d __A
, long long __B
)
867 return (__m128d
)__builtin_ia32_cvtsi642sd ((__v2df
) __A
, __B
);
870 /* Microsoft intrinsic. */
871 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
872 _mm_cvtsi64x_sd (__m128d __A
, long long __B
)
874 return (__m128d
)__builtin_ia32_cvtsi642sd ((__v2df
) __A
, __B
);
878 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
879 _mm_cvtss_sd (__m128d __A
, __m128 __B
)
881 return (__m128d
)__builtin_ia32_cvtss2sd ((__v2df
) __A
, (__v4sf
)__B
);
885 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
886 _mm_shuffle_pd(__m128d __A
, __m128d __B
, const int __mask
)
888 return (__m128d
)__builtin_ia32_shufpd ((__v2df
)__A
, (__v2df
)__B
, __mask
);
891 #define _mm_shuffle_pd(A, B, N) \
892 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
893 (__v2df)(__m128d)(B), (int)(N)))
896 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
897 _mm_unpackhi_pd (__m128d __A
, __m128d __B
)
899 return (__m128d
)__builtin_ia32_unpckhpd ((__v2df
)__A
, (__v2df
)__B
);
902 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
903 _mm_unpacklo_pd (__m128d __A
, __m128d __B
)
905 return (__m128d
)__builtin_ia32_unpcklpd ((__v2df
)__A
, (__v2df
)__B
);
908 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
909 _mm_loadh_pd (__m128d __A
, double const *__B
)
911 return (__m128d
)__builtin_ia32_loadhpd ((__v2df
)__A
, __B
);
914 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
915 _mm_loadl_pd (__m128d __A
, double const *__B
)
917 return (__m128d
)__builtin_ia32_loadlpd ((__v2df
)__A
, __B
);
920 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
921 _mm_movemask_pd (__m128d __A
)
923 return __builtin_ia32_movmskpd ((__v2df
)__A
);
926 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
927 _mm_packs_epi16 (__m128i __A
, __m128i __B
)
929 return (__m128i
)__builtin_ia32_packsswb128 ((__v8hi
)__A
, (__v8hi
)__B
);
932 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
933 _mm_packs_epi32 (__m128i __A
, __m128i __B
)
935 return (__m128i
)__builtin_ia32_packssdw128 ((__v4si
)__A
, (__v4si
)__B
);
938 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
939 _mm_packus_epi16 (__m128i __A
, __m128i __B
)
941 return (__m128i
)__builtin_ia32_packuswb128 ((__v8hi
)__A
, (__v8hi
)__B
);
944 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
945 _mm_unpackhi_epi8 (__m128i __A
, __m128i __B
)
947 return (__m128i
)__builtin_ia32_punpckhbw128 ((__v16qi
)__A
, (__v16qi
)__B
);
950 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
951 _mm_unpackhi_epi16 (__m128i __A
, __m128i __B
)
953 return (__m128i
)__builtin_ia32_punpckhwd128 ((__v8hi
)__A
, (__v8hi
)__B
);
956 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
957 _mm_unpackhi_epi32 (__m128i __A
, __m128i __B
)
959 return (__m128i
)__builtin_ia32_punpckhdq128 ((__v4si
)__A
, (__v4si
)__B
);
962 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
963 _mm_unpackhi_epi64 (__m128i __A
, __m128i __B
)
965 return (__m128i
)__builtin_ia32_punpckhqdq128 ((__v2di
)__A
, (__v2di
)__B
);
968 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
969 _mm_unpacklo_epi8 (__m128i __A
, __m128i __B
)
971 return (__m128i
)__builtin_ia32_punpcklbw128 ((__v16qi
)__A
, (__v16qi
)__B
);
974 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
975 _mm_unpacklo_epi16 (__m128i __A
, __m128i __B
)
977 return (__m128i
)__builtin_ia32_punpcklwd128 ((__v8hi
)__A
, (__v8hi
)__B
);
980 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
981 _mm_unpacklo_epi32 (__m128i __A
, __m128i __B
)
983 return (__m128i
)__builtin_ia32_punpckldq128 ((__v4si
)__A
, (__v4si
)__B
);
986 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
987 _mm_unpacklo_epi64 (__m128i __A
, __m128i __B
)
989 return (__m128i
)__builtin_ia32_punpcklqdq128 ((__v2di
)__A
, (__v2di
)__B
);
992 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
993 _mm_add_epi8 (__m128i __A
, __m128i __B
)
995 return (__m128i
)__builtin_ia32_paddb128 ((__v16qi
)__A
, (__v16qi
)__B
);
998 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
999 _mm_add_epi16 (__m128i __A
, __m128i __B
)
1001 return (__m128i
)__builtin_ia32_paddw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1004 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1005 _mm_add_epi32 (__m128i __A
, __m128i __B
)
1007 return (__m128i
)__builtin_ia32_paddd128 ((__v4si
)__A
, (__v4si
)__B
);
1010 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1011 _mm_add_epi64 (__m128i __A
, __m128i __B
)
1013 return (__m128i
)__builtin_ia32_paddq128 ((__v2di
)__A
, (__v2di
)__B
);
1016 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1017 _mm_adds_epi8 (__m128i __A
, __m128i __B
)
1019 return (__m128i
)__builtin_ia32_paddsb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1022 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1023 _mm_adds_epi16 (__m128i __A
, __m128i __B
)
1025 return (__m128i
)__builtin_ia32_paddsw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1028 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1029 _mm_adds_epu8 (__m128i __A
, __m128i __B
)
1031 return (__m128i
)__builtin_ia32_paddusb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1034 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1035 _mm_adds_epu16 (__m128i __A
, __m128i __B
)
1037 return (__m128i
)__builtin_ia32_paddusw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1040 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1041 _mm_sub_epi8 (__m128i __A
, __m128i __B
)
1043 return (__m128i
)__builtin_ia32_psubb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1046 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1047 _mm_sub_epi16 (__m128i __A
, __m128i __B
)
1049 return (__m128i
)__builtin_ia32_psubw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1052 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1053 _mm_sub_epi32 (__m128i __A
, __m128i __B
)
1055 return (__m128i
)__builtin_ia32_psubd128 ((__v4si
)__A
, (__v4si
)__B
);
1058 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1059 _mm_sub_epi64 (__m128i __A
, __m128i __B
)
1061 return (__m128i
)__builtin_ia32_psubq128 ((__v2di
)__A
, (__v2di
)__B
);
1064 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1065 _mm_subs_epi8 (__m128i __A
, __m128i __B
)
1067 return (__m128i
)__builtin_ia32_psubsb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1070 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1071 _mm_subs_epi16 (__m128i __A
, __m128i __B
)
1073 return (__m128i
)__builtin_ia32_psubsw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1076 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1077 _mm_subs_epu8 (__m128i __A
, __m128i __B
)
1079 return (__m128i
)__builtin_ia32_psubusb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1082 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1083 _mm_subs_epu16 (__m128i __A
, __m128i __B
)
1085 return (__m128i
)__builtin_ia32_psubusw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1088 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1089 _mm_madd_epi16 (__m128i __A
, __m128i __B
)
1091 return (__m128i
)__builtin_ia32_pmaddwd128 ((__v8hi
)__A
, (__v8hi
)__B
);
1094 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1095 _mm_mulhi_epi16 (__m128i __A
, __m128i __B
)
1097 return (__m128i
)__builtin_ia32_pmulhw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1100 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1101 _mm_mullo_epi16 (__m128i __A
, __m128i __B
)
1103 return (__m128i
)__builtin_ia32_pmullw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1106 extern __inline __m64
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1107 _mm_mul_su32 (__m64 __A
, __m64 __B
)
1109 return (__m64
)__builtin_ia32_pmuludq ((__v2si
)__A
, (__v2si
)__B
);
1112 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1113 _mm_mul_epu32 (__m128i __A
, __m128i __B
)
1115 return (__m128i
)__builtin_ia32_pmuludq128 ((__v4si
)__A
, (__v4si
)__B
);
1118 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1119 _mm_slli_epi16 (__m128i __A
, int __B
)
1121 return (__m128i
)__builtin_ia32_psllwi128 ((__v8hi
)__A
, __B
);
1124 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1125 _mm_slli_epi32 (__m128i __A
, int __B
)
1127 return (__m128i
)__builtin_ia32_pslldi128 ((__v4si
)__A
, __B
);
1130 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1131 _mm_slli_epi64 (__m128i __A
, int __B
)
1133 return (__m128i
)__builtin_ia32_psllqi128 ((__v2di
)__A
, __B
);
1136 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1137 _mm_srai_epi16 (__m128i __A
, int __B
)
1139 return (__m128i
)__builtin_ia32_psrawi128 ((__v8hi
)__A
, __B
);
1142 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1143 _mm_srai_epi32 (__m128i __A
, int __B
)
1145 return (__m128i
)__builtin_ia32_psradi128 ((__v4si
)__A
, __B
);
1149 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1150 _mm_srli_si128 (__m128i __A
, const int __N
)
1152 return (__m128i
)__builtin_ia32_psrldqi128 (__A
, __N
* 8);
1155 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1156 _mm_slli_si128 (__m128i __A
, const int __N
)
1158 return (__m128i
)__builtin_ia32_pslldqi128 (__A
, __N
* 8);
1161 #define _mm_srli_si128(A, N) \
1162 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1163 #define _mm_slli_si128(A, N) \
1164 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1167 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1168 _mm_srli_epi16 (__m128i __A
, int __B
)
1170 return (__m128i
)__builtin_ia32_psrlwi128 ((__v8hi
)__A
, __B
);
1173 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1174 _mm_srli_epi32 (__m128i __A
, int __B
)
1176 return (__m128i
)__builtin_ia32_psrldi128 ((__v4si
)__A
, __B
);
1179 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1180 _mm_srli_epi64 (__m128i __A
, int __B
)
1182 return (__m128i
)__builtin_ia32_psrlqi128 ((__v2di
)__A
, __B
);
1185 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1186 _mm_sll_epi16 (__m128i __A
, __m128i __B
)
1188 return (__m128i
)__builtin_ia32_psllw128((__v8hi
)__A
, (__v8hi
)__B
);
1191 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1192 _mm_sll_epi32 (__m128i __A
, __m128i __B
)
1194 return (__m128i
)__builtin_ia32_pslld128((__v4si
)__A
, (__v4si
)__B
);
1197 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1198 _mm_sll_epi64 (__m128i __A
, __m128i __B
)
1200 return (__m128i
)__builtin_ia32_psllq128((__v2di
)__A
, (__v2di
)__B
);
1203 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1204 _mm_sra_epi16 (__m128i __A
, __m128i __B
)
1206 return (__m128i
)__builtin_ia32_psraw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1209 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1210 _mm_sra_epi32 (__m128i __A
, __m128i __B
)
1212 return (__m128i
)__builtin_ia32_psrad128 ((__v4si
)__A
, (__v4si
)__B
);
1215 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1216 _mm_srl_epi16 (__m128i __A
, __m128i __B
)
1218 return (__m128i
)__builtin_ia32_psrlw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1221 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1222 _mm_srl_epi32 (__m128i __A
, __m128i __B
)
1224 return (__m128i
)__builtin_ia32_psrld128 ((__v4si
)__A
, (__v4si
)__B
);
1227 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1228 _mm_srl_epi64 (__m128i __A
, __m128i __B
)
1230 return (__m128i
)__builtin_ia32_psrlq128 ((__v2di
)__A
, (__v2di
)__B
);
1233 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1234 _mm_and_si128 (__m128i __A
, __m128i __B
)
1236 return (__m128i
)__builtin_ia32_pand128 ((__v2di
)__A
, (__v2di
)__B
);
1239 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1240 _mm_andnot_si128 (__m128i __A
, __m128i __B
)
1242 return (__m128i
)__builtin_ia32_pandn128 ((__v2di
)__A
, (__v2di
)__B
);
1245 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1246 _mm_or_si128 (__m128i __A
, __m128i __B
)
1248 return (__m128i
)__builtin_ia32_por128 ((__v2di
)__A
, (__v2di
)__B
);
1251 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1252 _mm_xor_si128 (__m128i __A
, __m128i __B
)
1254 return (__m128i
)__builtin_ia32_pxor128 ((__v2di
)__A
, (__v2di
)__B
);
1257 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1258 _mm_cmpeq_epi8 (__m128i __A
, __m128i __B
)
1260 return (__m128i
)__builtin_ia32_pcmpeqb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1263 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1264 _mm_cmpeq_epi16 (__m128i __A
, __m128i __B
)
1266 return (__m128i
)__builtin_ia32_pcmpeqw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1269 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1270 _mm_cmpeq_epi32 (__m128i __A
, __m128i __B
)
1272 return (__m128i
)__builtin_ia32_pcmpeqd128 ((__v4si
)__A
, (__v4si
)__B
);
1275 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1276 _mm_cmplt_epi8 (__m128i __A
, __m128i __B
)
1278 return (__m128i
)__builtin_ia32_pcmpgtb128 ((__v16qi
)__B
, (__v16qi
)__A
);
1281 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1282 _mm_cmplt_epi16 (__m128i __A
, __m128i __B
)
1284 return (__m128i
)__builtin_ia32_pcmpgtw128 ((__v8hi
)__B
, (__v8hi
)__A
);
1287 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1288 _mm_cmplt_epi32 (__m128i __A
, __m128i __B
)
1290 return (__m128i
)__builtin_ia32_pcmpgtd128 ((__v4si
)__B
, (__v4si
)__A
);
1293 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1294 _mm_cmpgt_epi8 (__m128i __A
, __m128i __B
)
1296 return (__m128i
)__builtin_ia32_pcmpgtb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1299 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1300 _mm_cmpgt_epi16 (__m128i __A
, __m128i __B
)
1302 return (__m128i
)__builtin_ia32_pcmpgtw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1305 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1306 _mm_cmpgt_epi32 (__m128i __A
, __m128i __B
)
1308 return (__m128i
)__builtin_ia32_pcmpgtd128 ((__v4si
)__A
, (__v4si
)__B
);
1312 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1313 _mm_extract_epi16 (__m128i
const __A
, int const __N
)
1315 return __builtin_ia32_vec_ext_v8hi ((__v8hi
)__A
, __N
);
1318 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1319 _mm_insert_epi16 (__m128i
const __A
, int const __D
, int const __N
)
1321 return (__m128i
) __builtin_ia32_vec_set_v8hi ((__v8hi
)__A
, __D
, __N
);
1324 #define _mm_extract_epi16(A, N) \
1325 ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1326 #define _mm_insert_epi16(A, D, N) \
1327 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1328 (int)(D), (int)(N)))
1331 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1332 _mm_max_epi16 (__m128i __A
, __m128i __B
)
1334 return (__m128i
)__builtin_ia32_pmaxsw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1337 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1338 _mm_max_epu8 (__m128i __A
, __m128i __B
)
1340 return (__m128i
)__builtin_ia32_pmaxub128 ((__v16qi
)__A
, (__v16qi
)__B
);
1343 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1344 _mm_min_epi16 (__m128i __A
, __m128i __B
)
1346 return (__m128i
)__builtin_ia32_pminsw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1349 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1350 _mm_min_epu8 (__m128i __A
, __m128i __B
)
1352 return (__m128i
)__builtin_ia32_pminub128 ((__v16qi
)__A
, (__v16qi
)__B
);
1355 extern __inline
int __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1356 _mm_movemask_epi8 (__m128i __A
)
1358 return __builtin_ia32_pmovmskb128 ((__v16qi
)__A
);
1361 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1362 _mm_mulhi_epu16 (__m128i __A
, __m128i __B
)
1364 return (__m128i
)__builtin_ia32_pmulhuw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1368 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1369 _mm_shufflehi_epi16 (__m128i __A
, const int __mask
)
1371 return (__m128i
)__builtin_ia32_pshufhw ((__v8hi
)__A
, __mask
);
1374 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1375 _mm_shufflelo_epi16 (__m128i __A
, const int __mask
)
1377 return (__m128i
)__builtin_ia32_pshuflw ((__v8hi
)__A
, __mask
);
1380 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1381 _mm_shuffle_epi32 (__m128i __A
, const int __mask
)
1383 return (__m128i
)__builtin_ia32_pshufd ((__v4si
)__A
, __mask
);
1386 #define _mm_shufflehi_epi16(A, N) \
1387 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1388 #define _mm_shufflelo_epi16(A, N) \
1389 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1390 #define _mm_shuffle_epi32(A, N) \
1391 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1394 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1395 _mm_maskmoveu_si128 (__m128i __A
, __m128i __B
, char *__C
)
1397 __builtin_ia32_maskmovdqu ((__v16qi
)__A
, (__v16qi
)__B
, __C
);
1400 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1401 _mm_avg_epu8 (__m128i __A
, __m128i __B
)
1403 return (__m128i
)__builtin_ia32_pavgb128 ((__v16qi
)__A
, (__v16qi
)__B
);
1406 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1407 _mm_avg_epu16 (__m128i __A
, __m128i __B
)
1409 return (__m128i
)__builtin_ia32_pavgw128 ((__v8hi
)__A
, (__v8hi
)__B
);
1412 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1413 _mm_sad_epu8 (__m128i __A
, __m128i __B
)
1415 return (__m128i
)__builtin_ia32_psadbw128 ((__v16qi
)__A
, (__v16qi
)__B
);
1418 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1419 _mm_stream_si32 (int *__A
, int __B
)
1421 __builtin_ia32_movnti (__A
, __B
);
1424 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1425 _mm_stream_si128 (__m128i
*__A
, __m128i __B
)
1427 __builtin_ia32_movntdq ((__v2di
*)__A
, (__v2di
)__B
);
1430 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1431 _mm_stream_pd (double *__A
, __m128d __B
)
1433 __builtin_ia32_movntpd (__A
, (__v2df
)__B
);
1436 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1437 _mm_clflush (void const *__A
)
1439 __builtin_ia32_clflush (__A
);
1442 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1445 __builtin_ia32_lfence ();
1448 extern __inline
void __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1451 __builtin_ia32_mfence ();
1454 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1455 _mm_cvtsi32_si128 (int __A
)
1457 return _mm_set_epi32 (0, 0, 0, __A
);
1461 /* Intel intrinsic. */
1462 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1463 _mm_cvtsi64_si128 (long long __A
)
1465 return _mm_set_epi64x (0, __A
);
1468 /* Microsoft intrinsic. */
1469 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1470 _mm_cvtsi64x_si128 (long long __A
)
1472 return _mm_set_epi64x (0, __A
);
1476 /* Casts between various SP, DP, INT vector types. Note that these do no
1477 conversion of values, they just change the type. */
1478 extern __inline __m128
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1479 _mm_castpd_ps(__m128d __A
)
1481 return (__m128
) __A
;
1484 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1485 _mm_castpd_si128(__m128d __A
)
1487 return (__m128i
) __A
;
1490 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1491 _mm_castps_pd(__m128 __A
)
1493 return (__m128d
) __A
;
1496 extern __inline __m128i
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1497 _mm_castps_si128(__m128 __A
)
1499 return (__m128i
) __A
;
1502 extern __inline __m128
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1503 _mm_castsi128_ps(__m128i __A
)
1505 return (__m128
) __A
;
1508 extern __inline __m128d
__attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
1509 _mm_castsi128_pd(__m128i __A
)
1511 return (__m128d
) __A
;
1514 #endif /* __SSE2__ */
1516 #endif /* _EMMINTRIN_H_INCLUDED */