]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/mmintrin.h
9b84bb60e0af05e8092d4bcc3225dc762c62b1de
[thirdparty/gcc.git] / gcc / config / i386 / mmintrin.h
1 /* Copyright (C) 2002-2024 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
26
27 #ifndef _MMINTRIN_H_INCLUDED
28 #define _MMINTRIN_H_INCLUDED
29
30 #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
31 #pragma GCC push_options
32 #ifdef __MMX_WITH_SSE__
33 #pragma GCC target("sse2")
34 #elif defined __x86_64__
35 #pragma GCC target("sse,mmx")
36 #else
37 #pragma GCC target("mmx")
38 #endif
39 #define __DISABLE_MMX__
40 #endif /* __MMX__ */
41
42 /* The Intel API is flexible enough that we must allow aliasing with other
43 vector types, and their scalar components. */
44 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
45 typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
46 typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
47
48 /* Unaligned version of the same type */
49 typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
50 typedef int __m32_u __attribute__ ((__vector_size__ (4), \
51 __may_alias__, __aligned__ (1)));
52 typedef short __m16_u __attribute__ ((__vector_size__ (2), \
53 __may_alias__, __aligned__ (1)));
54
55 /* Internal data types for implementing the intrinsics. */
56 typedef int __v2si __attribute__ ((__vector_size__ (8)));
57 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
58 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
59 typedef long long __v1di __attribute__ ((__vector_size__ (8)));
60 typedef float __v2sf __attribute__ ((__vector_size__ (8)));
61
62 /* Empty the multimedia state. */
63 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 _mm_empty (void)
65 {
66 __builtin_ia32_emms ();
67 }
68
69 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70 _m_empty (void)
71 {
72 _mm_empty ();
73 }
74
75 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
76 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 _mm_cvtsi32_si64 (int __i)
78 {
79 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
80 }
81
82 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83 _m_from_int (int __i)
84 {
85 return _mm_cvtsi32_si64 (__i);
86 }
87
88 #ifdef __x86_64__
89 /* Convert I to a __m64 object. */
90
91 /* Intel intrinsic. */
92 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93 _m_from_int64 (long long __i)
94 {
95 return (__m64) __i;
96 }
97
98 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 _mm_cvtsi64_m64 (long long __i)
100 {
101 return (__m64) __i;
102 }
103
104 /* Microsoft intrinsic. */
105 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
106 _mm_cvtsi64x_si64 (long long __i)
107 {
108 return (__m64) __i;
109 }
110
111 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112 _mm_set_pi64x (long long __i)
113 {
114 return (__m64) __i;
115 }
116 #endif
117
118 /* Convert the lower 32 bits of the __m64 object into an integer. */
119 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 _mm_cvtsi64_si32 (__m64 __i)
121 {
122 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
123 }
124
125 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126 _m_to_int (__m64 __i)
127 {
128 return _mm_cvtsi64_si32 (__i);
129 }
130
131 #ifdef __x86_64__
132 /* Convert the __m64 object to a 64bit integer. */
133
134 /* Intel intrinsic. */
135 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136 _m_to_int64 (__m64 __i)
137 {
138 return (long long)__i;
139 }
140
141 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142 _mm_cvtm64_si64 (__m64 __i)
143 {
144 return (long long)__i;
145 }
146
147 /* Microsoft intrinsic. */
148 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149 _mm_cvtsi64_si64x (__m64 __i)
150 {
151 return (long long)__i;
152 }
153 #endif
154
155 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
156 the result, and the four 16-bit values from M2 into the upper four 8-bit
157 values of the result, all with signed saturation. */
158 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
160 {
161 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
162 }
163
164 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165 _m_packsswb (__m64 __m1, __m64 __m2)
166 {
167 return _mm_packs_pi16 (__m1, __m2);
168 }
169
170 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
171 the result, and the two 32-bit values from M2 into the upper two 16-bit
172 values of the result, all with signed saturation. */
173 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
175 {
176 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
177 }
178
179 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
180 _m_packssdw (__m64 __m1, __m64 __m2)
181 {
182 return _mm_packs_pi32 (__m1, __m2);
183 }
184
185 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
186 the result, and the four 16-bit values from M2 into the upper four 8-bit
187 values of the result, all with unsigned saturation. */
188 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
190 {
191 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
192 }
193
194 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
195 _m_packuswb (__m64 __m1, __m64 __m2)
196 {
197 return _mm_packs_pu16 (__m1, __m2);
198 }
199
200 /* Interleave the four 8-bit values from the high half of M1 with the four
201 8-bit values from the high half of M2. */
202 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
203 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
204 {
205 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
206 }
207
208 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209 _m_punpckhbw (__m64 __m1, __m64 __m2)
210 {
211 return _mm_unpackhi_pi8 (__m1, __m2);
212 }
213
214 /* Interleave the two 16-bit values from the high half of M1 with the two
215 16-bit values from the high half of M2. */
216 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
218 {
219 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
220 }
221
222 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223 _m_punpckhwd (__m64 __m1, __m64 __m2)
224 {
225 return _mm_unpackhi_pi16 (__m1, __m2);
226 }
227
228 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
229 value from the high half of M2. */
230 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
232 {
233 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
234 }
235
236 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237 _m_punpckhdq (__m64 __m1, __m64 __m2)
238 {
239 return _mm_unpackhi_pi32 (__m1, __m2);
240 }
241
242 /* Interleave the four 8-bit values from the low half of M1 with the four
243 8-bit values from the low half of M2. */
244 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
246 {
247 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
248 }
249
250 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
251 _m_punpcklbw (__m64 __m1, __m64 __m2)
252 {
253 return _mm_unpacklo_pi8 (__m1, __m2);
254 }
255
256 /* Interleave the two 16-bit values from the low half of M1 with the two
257 16-bit values from the low half of M2. */
258 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
259 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
260 {
261 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
262 }
263
264 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265 _m_punpcklwd (__m64 __m1, __m64 __m2)
266 {
267 return _mm_unpacklo_pi16 (__m1, __m2);
268 }
269
270 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
271 value from the low half of M2. */
272 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
274 {
275 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
276 }
277
278 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279 _m_punpckldq (__m64 __m1, __m64 __m2)
280 {
281 return _mm_unpacklo_pi32 (__m1, __m2);
282 }
283
284 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
285 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286 _mm_add_pi8 (__m64 __m1, __m64 __m2)
287 {
288 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
289 }
290
291 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
292 _m_paddb (__m64 __m1, __m64 __m2)
293 {
294 return _mm_add_pi8 (__m1, __m2);
295 }
296
297 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
298 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
299 _mm_add_pi16 (__m64 __m1, __m64 __m2)
300 {
301 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
302 }
303
304 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305 _m_paddw (__m64 __m1, __m64 __m2)
306 {
307 return _mm_add_pi16 (__m1, __m2);
308 }
309
310 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
311 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
312 _mm_add_pi32 (__m64 __m1, __m64 __m2)
313 {
314 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
315 }
316
317 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
318 _m_paddd (__m64 __m1, __m64 __m2)
319 {
320 return _mm_add_pi32 (__m1, __m2);
321 }
322
323 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
324 #ifndef __SSE2__
325 #pragma GCC push_options
326 #ifdef __MMX_WITH_SSE__
327 #pragma GCC target("sse2")
328 #else
329 #pragma GCC target("sse2,mmx")
330 #endif
331 #define __DISABLE_SSE2__
332 #endif /* __SSE2__ */
333
334 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
335 _mm_add_si64 (__m64 __m1, __m64 __m2)
336 {
337 return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
338 }
339 #ifdef __DISABLE_SSE2__
340 #undef __DISABLE_SSE2__
341 #pragma GCC pop_options
342 #endif /* __DISABLE_SSE2__ */
343
344 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
345 saturated arithmetic. */
346 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
347 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
348 {
349 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
350 }
351
352 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
353 _m_paddsb (__m64 __m1, __m64 __m2)
354 {
355 return _mm_adds_pi8 (__m1, __m2);
356 }
357
358 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
359 saturated arithmetic. */
360 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
361 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
362 {
363 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
364 }
365
366 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367 _m_paddsw (__m64 __m1, __m64 __m2)
368 {
369 return _mm_adds_pi16 (__m1, __m2);
370 }
371
372 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
373 saturated arithmetic. */
374 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
376 {
377 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
378 }
379
380 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381 _m_paddusb (__m64 __m1, __m64 __m2)
382 {
383 return _mm_adds_pu8 (__m1, __m2);
384 }
385
386 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
387 saturated arithmetic. */
388 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
390 {
391 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
392 }
393
394 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
395 _m_paddusw (__m64 __m1, __m64 __m2)
396 {
397 return _mm_adds_pu16 (__m1, __m2);
398 }
399
400 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
401 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
402 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
403 {
404 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
405 }
406
407 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
408 _m_psubb (__m64 __m1, __m64 __m2)
409 {
410 return _mm_sub_pi8 (__m1, __m2);
411 }
412
413 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
414 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
415 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
416 {
417 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
418 }
419
420 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421 _m_psubw (__m64 __m1, __m64 __m2)
422 {
423 return _mm_sub_pi16 (__m1, __m2);
424 }
425
426 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
427 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
429 {
430 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
431 }
432
433 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 _m_psubd (__m64 __m1, __m64 __m2)
435 {
436 return _mm_sub_pi32 (__m1, __m2);
437 }
438
439 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
440 #ifndef __SSE2__
441 #pragma GCC push_options
442 #ifdef __MMX_WITH_SSE__
443 #pragma GCC target("sse2")
444 #else
445 #pragma GCC target("sse2,mmx")
446 #endif
447 #define __DISABLE_SSE2__
448 #endif /* __SSE2__ */
449
450 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
451 _mm_sub_si64 (__m64 __m1, __m64 __m2)
452 {
453 return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
454 }
455 #ifdef __DISABLE_SSE2__
456 #undef __DISABLE_SSE2__
457 #pragma GCC pop_options
458 #endif /* __DISABLE_SSE2__ */
459
460 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
461 saturating arithmetic. */
462 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
463 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
464 {
465 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
466 }
467
468 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
469 _m_psubsb (__m64 __m1, __m64 __m2)
470 {
471 return _mm_subs_pi8 (__m1, __m2);
472 }
473
474 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
475 signed saturating arithmetic. */
476 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
478 {
479 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
480 }
481
482 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
483 _m_psubsw (__m64 __m1, __m64 __m2)
484 {
485 return _mm_subs_pi16 (__m1, __m2);
486 }
487
488 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
489 unsigned saturating arithmetic. */
490 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
491 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
492 {
493 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
494 }
495
496 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
497 _m_psubusb (__m64 __m1, __m64 __m2)
498 {
499 return _mm_subs_pu8 (__m1, __m2);
500 }
501
502 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
503 unsigned saturating arithmetic. */
504 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
505 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
506 {
507 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
508 }
509
510 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
511 _m_psubusw (__m64 __m1, __m64 __m2)
512 {
513 return _mm_subs_pu16 (__m1, __m2);
514 }
515
516 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
517 four 32-bit intermediate results, which are then summed by pairs to
518 produce two 32-bit results. */
519 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
520 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
521 {
522 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
523 }
524
525 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 _m_pmaddwd (__m64 __m1, __m64 __m2)
527 {
528 return _mm_madd_pi16 (__m1, __m2);
529 }
530
531 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
532 M2 and produce the high 16 bits of the 32-bit results. */
533 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
535 {
536 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
537 }
538
539 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540 _m_pmulhw (__m64 __m1, __m64 __m2)
541 {
542 return _mm_mulhi_pi16 (__m1, __m2);
543 }
544
545 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
546 the low 16 bits of the results. */
547 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
549 {
550 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
551 }
552
553 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554 _m_pmullw (__m64 __m1, __m64 __m2)
555 {
556 return _mm_mullo_pi16 (__m1, __m2);
557 }
558
559 /* Shift four 16-bit values in M left by COUNT. */
560 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
561 _mm_sll_pi16 (__m64 __m, __m64 __count)
562 {
563 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
564 }
565
566 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
567 _m_psllw (__m64 __m, __m64 __count)
568 {
569 return _mm_sll_pi16 (__m, __count);
570 }
571
572 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
573 _mm_slli_pi16 (__m64 __m, int __count)
574 {
575 return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
576 }
577
578 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
579 _m_psllwi (__m64 __m, int __count)
580 {
581 return _mm_slli_pi16 (__m, __count);
582 }
583
584 /* Shift two 32-bit values in M left by COUNT. */
585 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
586 _mm_sll_pi32 (__m64 __m, __m64 __count)
587 {
588 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
589 }
590
591 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592 _m_pslld (__m64 __m, __m64 __count)
593 {
594 return _mm_sll_pi32 (__m, __count);
595 }
596
597 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598 _mm_slli_pi32 (__m64 __m, int __count)
599 {
600 return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
601 }
602
603 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604 _m_pslldi (__m64 __m, int __count)
605 {
606 return _mm_slli_pi32 (__m, __count);
607 }
608
609 /* Shift the 64-bit value in M left by COUNT. */
610 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611 _mm_sll_si64 (__m64 __m, __m64 __count)
612 {
613 return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
614 }
615
616 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
617 _m_psllq (__m64 __m, __m64 __count)
618 {
619 return _mm_sll_si64 (__m, __count);
620 }
621
622 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
623 _mm_slli_si64 (__m64 __m, int __count)
624 {
625 return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
626 }
627
628 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629 _m_psllqi (__m64 __m, int __count)
630 {
631 return _mm_slli_si64 (__m, __count);
632 }
633
634 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
635 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636 _mm_sra_pi16 (__m64 __m, __m64 __count)
637 {
638 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
639 }
640
641 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642 _m_psraw (__m64 __m, __m64 __count)
643 {
644 return _mm_sra_pi16 (__m, __count);
645 }
646
647 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648 _mm_srai_pi16 (__m64 __m, int __count)
649 {
650 return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
651 }
652
653 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654 _m_psrawi (__m64 __m, int __count)
655 {
656 return _mm_srai_pi16 (__m, __count);
657 }
658
659 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
660 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
661 _mm_sra_pi32 (__m64 __m, __m64 __count)
662 {
663 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
664 }
665
666 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
667 _m_psrad (__m64 __m, __m64 __count)
668 {
669 return _mm_sra_pi32 (__m, __count);
670 }
671
672 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673 _mm_srai_pi32 (__m64 __m, int __count)
674 {
675 return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
676 }
677
678 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
679 _m_psradi (__m64 __m, int __count)
680 {
681 return _mm_srai_pi32 (__m, __count);
682 }
683
684 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
685 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
686 _mm_srl_pi16 (__m64 __m, __m64 __count)
687 {
688 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
689 }
690
691 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692 _m_psrlw (__m64 __m, __m64 __count)
693 {
694 return _mm_srl_pi16 (__m, __count);
695 }
696
697 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698 _mm_srli_pi16 (__m64 __m, int __count)
699 {
700 return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
701 }
702
703 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704 _m_psrlwi (__m64 __m, int __count)
705 {
706 return _mm_srli_pi16 (__m, __count);
707 }
708
709 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
710 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
711 _mm_srl_pi32 (__m64 __m, __m64 __count)
712 {
713 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
714 }
715
716 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
717 _m_psrld (__m64 __m, __m64 __count)
718 {
719 return _mm_srl_pi32 (__m, __count);
720 }
721
722 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
723 _mm_srli_pi32 (__m64 __m, int __count)
724 {
725 return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
726 }
727
728 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
729 _m_psrldi (__m64 __m, int __count)
730 {
731 return _mm_srli_pi32 (__m, __count);
732 }
733
734 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
735 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
736 _mm_srl_si64 (__m64 __m, __m64 __count)
737 {
738 return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
739 }
740
741 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742 _m_psrlq (__m64 __m, __m64 __count)
743 {
744 return _mm_srl_si64 (__m, __count);
745 }
746
747 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748 _mm_srli_si64 (__m64 __m, int __count)
749 {
750 return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
751 }
752
753 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754 _m_psrlqi (__m64 __m, int __count)
755 {
756 return _mm_srli_si64 (__m, __count);
757 }
758
759 /* Bit-wise AND the 64-bit values in M1 and M2. */
760 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
761 _mm_and_si64 (__m64 __m1, __m64 __m2)
762 {
763 return __builtin_ia32_pand (__m1, __m2);
764 }
765
766 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
767 _m_pand (__m64 __m1, __m64 __m2)
768 {
769 return _mm_and_si64 (__m1, __m2);
770 }
771
772 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
773 64-bit value in M2. */
774 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
775 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
776 {
777 return __builtin_ia32_pandn (__m1, __m2);
778 }
779
780 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
781 _m_pandn (__m64 __m1, __m64 __m2)
782 {
783 return _mm_andnot_si64 (__m1, __m2);
784 }
785
786 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
787 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
788 _mm_or_si64 (__m64 __m1, __m64 __m2)
789 {
790 return __builtin_ia32_por (__m1, __m2);
791 }
792
793 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794 _m_por (__m64 __m1, __m64 __m2)
795 {
796 return _mm_or_si64 (__m1, __m2);
797 }
798
799 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
800 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801 _mm_xor_si64 (__m64 __m1, __m64 __m2)
802 {
803 return __builtin_ia32_pxor (__m1, __m2);
804 }
805
806 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807 _m_pxor (__m64 __m1, __m64 __m2)
808 {
809 return _mm_xor_si64 (__m1, __m2);
810 }
811
812 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
813 test is true and zero if false. */
814 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
815 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
816 {
817 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
818 }
819
820 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
821 _m_pcmpeqb (__m64 __m1, __m64 __m2)
822 {
823 return _mm_cmpeq_pi8 (__m1, __m2);
824 }
825
826 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
827 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
828 {
829 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
830 }
831
832 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
833 _m_pcmpgtb (__m64 __m1, __m64 __m2)
834 {
835 return _mm_cmpgt_pi8 (__m1, __m2);
836 }
837
838 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
839 the test is true and zero if false. */
840 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
841 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
842 {
843 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
844 }
845
846 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
847 _m_pcmpeqw (__m64 __m1, __m64 __m2)
848 {
849 return _mm_cmpeq_pi16 (__m1, __m2);
850 }
851
852 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
853 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
854 {
855 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
856 }
857
858 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
859 _m_pcmpgtw (__m64 __m1, __m64 __m2)
860 {
861 return _mm_cmpgt_pi16 (__m1, __m2);
862 }
863
864 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
865 the test is true and zero if false. */
866 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
867 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
868 {
869 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
870 }
871
872 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873 _m_pcmpeqd (__m64 __m1, __m64 __m2)
874 {
875 return _mm_cmpeq_pi32 (__m1, __m2);
876 }
877
878 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
880 {
881 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
882 }
883
884 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
885 _m_pcmpgtd (__m64 __m1, __m64 __m2)
886 {
887 return _mm_cmpgt_pi32 (__m1, __m2);
888 }
889
890 /* Creates a 64-bit zero. */
891 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
892 _mm_setzero_si64 (void)
893 {
894 return (__m64)0LL;
895 }
896
897 /* Creates a vector of two 32-bit values; I0 is least significant. */
898 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
899 _mm_set_pi32 (int __i1, int __i0)
900 {
901 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
902 }
903
904 /* Creates a vector of four 16-bit values; W0 is least significant. */
905 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
906 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
907 {
908 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
909 }
910
911 /* Creates a vector of eight 8-bit values; B0 is least significant. */
912 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
913 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
914 char __b3, char __b2, char __b1, char __b0)
915 {
916 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
917 __b4, __b5, __b6, __b7);
918 }
919
920 /* Similar, but with the arguments in reverse order. */
921 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922 _mm_setr_pi32 (int __i0, int __i1)
923 {
924 return _mm_set_pi32 (__i1, __i0);
925 }
926
927 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
928 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
929 {
930 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
931 }
932
933 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
934 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
935 char __b4, char __b5, char __b6, char __b7)
936 {
937 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
938 }
939
940 /* Creates a vector of two 32-bit values, both elements containing I. */
941 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
942 _mm_set1_pi32 (int __i)
943 {
944 return _mm_set_pi32 (__i, __i);
945 }
946
947 /* Creates a vector of four 16-bit values, all elements containing W. */
948 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
949 _mm_set1_pi16 (short __w)
950 {
951 return _mm_set_pi16 (__w, __w, __w, __w);
952 }
953
954 /* Creates a vector of eight 8-bit values, all elements containing B. */
955 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956 _mm_set1_pi8 (char __b)
957 {
958 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
959 }
960 #ifdef __DISABLE_MMX__
961 #undef __DISABLE_MMX__
962 #pragma GCC pop_options
963 #endif /* __DISABLE_MMX__ */
964
965 #endif /* _MMINTRIN_H_INCLUDED */