]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/rs6000/tmmintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / rs6000 / tmmintrin.h
CommitLineData
8d9254fc 1/* Copyright (C) 2003-2020 Free Software Foundation, Inc.
b1ddadac
PC
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24/* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
26
27#ifndef NO_WARN_X86_INTRINSICS
28/* This header is distributed to simplify porting x86_64 code that
29 makes explicit use of Intel intrinsics to powerpc64le.
30 It is the user's responsibility to determine if the results are
31 acceptable and make additional changes as necessary.
32 Note that much code that uses Intel intrinsics can be rewritten in
33 standard C or GNU C extensions, which are more portable and better
34 optimized across multiple targets. */
35#endif
36
37#ifndef TMMINTRIN_H_
38#define TMMINTRIN_H_
39
40#include <altivec.h>
41#include <assert.h>
42
43/* We need definitions from the SSE header files. */
44#include <pmmintrin.h>
45
46extern __inline __m128i
47__attribute__((__gnu_inline__, __always_inline__, __artificial__))
48_mm_abs_epi16 (__m128i __A)
49{
50 return (__m128i) vec_abs ((__v8hi) __A);
51}
52
53extern __inline __m128i
54__attribute__((__gnu_inline__, __always_inline__, __artificial__))
55_mm_abs_epi32 (__m128i __A)
56{
57 return (__m128i) vec_abs ((__v4si) __A);
58}
59
60extern __inline __m128i
61__attribute__((__gnu_inline__, __always_inline__, __artificial__))
62_mm_abs_epi8 (__m128i __A)
63{
64 return (__m128i) vec_abs ((__v16qi) __A);
65}
66
67extern __inline __m64
68__attribute__((__gnu_inline__, __always_inline__, __artificial__))
69_mm_abs_pi16 (__m64 __A)
70{
71 __v8hi __B = (__v8hi) (__v2du) { __A, __A };
72 return (__m64) ((__v2du) vec_abs (__B))[0];
73}
74
75extern __inline __m64
76__attribute__((__gnu_inline__, __always_inline__, __artificial__))
77_mm_abs_pi32 (__m64 __A)
78{
79 __v4si __B = (__v4si) (__v2du) { __A, __A };
80 return (__m64) ((__v2du) vec_abs (__B))[0];
81}
82
83extern __inline __m64
84__attribute__((__gnu_inline__, __always_inline__, __artificial__))
85_mm_abs_pi8 (__m64 __A)
86{
87 __v16qi __B = (__v16qi) (__v2du) { __A, __A };
88 return (__m64) ((__v2du) vec_abs (__B))[0];
89}
90
91extern __inline __m128i
92__attribute__((__gnu_inline__, __always_inline__, __artificial__))
93_mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
94{
95 if (__builtin_constant_p (__count) && __count < 16)
96 {
60c703ed 97#ifdef __LITTLE_ENDIAN__
b1ddadac
PC
98 __A = (__m128i) vec_reve ((__v16qu) __A);
99 __B = (__m128i) vec_reve ((__v16qu) __B);
100#endif
101 __A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
60c703ed 102#ifdef __LITTLE_ENDIAN__
b1ddadac
PC
103 __A = (__m128i) vec_reve ((__v16qu) __A);
104#endif
105 return __A;
106 }
107
108 if (__count == 0)
109 return __B;
110
111 if (__count >= 16)
112 {
113 if (__count >= 32)
114 {
115 const __v16qu zero = { 0 };
116 return (__m128i) zero;
117 }
118 else
119 {
120 const __v16qu __shift =
121 vec_splats ((unsigned char) ((__count - 16) * 8));
60c703ed 122#ifdef __LITTLE_ENDIAN__
b1ddadac
PC
123 return (__m128i) vec_sro ((__v16qu) __A, __shift);
124#else
125 return (__m128i) vec_slo ((__v16qu) __A, __shift);
126#endif
127 }
128 }
129 else
130 {
131 const __v16qu __shiftA =
132 vec_splats ((unsigned char) ((16 - __count) * 8));
133 const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
60c703ed 134#ifdef __LITTLE_ENDIAN__
b1ddadac
PC
135 __A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
136 __B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
137#else
138 __A = (__m128i) vec_sro ((__v16qu) __A, __shiftA);
139 __B = (__m128i) vec_slo ((__v16qu) __B, __shiftB);
140#endif
141 return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B);
142 }
143}
144
145extern __inline __m64
146__attribute__((__gnu_inline__, __always_inline__, __artificial__))
147_mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count)
148{
149 if (__count < 16)
150 {
151 __v2du __C = { __B, __A };
60c703ed 152#ifdef __LITTLE_ENDIAN__
b1ddadac
PC
153 const __v4su __shift = { __count << 3, 0, 0, 0 };
154 __C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
155#else
156 const __v4su __shift = { 0, 0, 0, __count << 3 };
157 __C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift);
158#endif
159 return (__m64) __C[0];
160 }
161 else
162 {
163 const __m64 __zero = { 0 };
164 return __zero;
165 }
166}
167
168extern __inline __m128i
169__attribute__((__gnu_inline__, __always_inline__, __artificial__))
170_mm_hadd_epi16 (__m128i __A, __m128i __B)
171{
172 const __v16qu __P =
173 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
174 const __v16qu __Q =
175 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
176 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
177 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
178 return (__m128i) vec_add (__C, __D);
179}
180
181extern __inline __m128i
182__attribute__((__gnu_inline__, __always_inline__, __artificial__))
183_mm_hadd_epi32 (__m128i __A, __m128i __B)
184{
185 const __v16qu __P =
186 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
187 const __v16qu __Q =
188 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
189 __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
190 __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
191 return (__m128i) vec_add (__C, __D);
192}
193
194extern __inline __m64
195__attribute__((__gnu_inline__, __always_inline__, __artificial__))
196_mm_hadd_pi16 (__m64 __A, __m64 __B)
197{
198 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
199 const __v16qu __P =
200 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
201 const __v16qu __Q =
202 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
203 __v8hi __D = vec_perm (__C, __C, __Q);
204 __C = vec_perm (__C, __C, __P);
205 __C = vec_add (__C, __D);
206 return (__m64) ((__v2du) __C)[1];
207}
208
209extern __inline __m64
210__attribute__((__gnu_inline__, __always_inline__, __artificial__))
211_mm_hadd_pi32 (__m64 __A, __m64 __B)
212{
213 __v4si __C = (__v4si) (__v2du) { __A, __B };
214 const __v16qu __P =
215 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
216 const __v16qu __Q =
217 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
218 __v4si __D = vec_perm (__C, __C, __Q);
219 __C = vec_perm (__C, __C, __P);
220 __C = vec_add (__C, __D);
221 return (__m64) ((__v2du) __C)[1];
222}
223
224extern __inline __m128i
225__attribute__((__gnu_inline__, __always_inline__, __artificial__))
226_mm_hadds_epi16 (__m128i __A, __m128i __B)
227{
228 __v4si __C = { 0 }, __D = { 0 };
229 __C = vec_sum4s ((__v8hi) __A, __C);
230 __D = vec_sum4s ((__v8hi) __B, __D);
9a0317e7 231 __C = (__v4si) vec_packs (__C, __D);
b1ddadac
PC
232 return (__m128i) __C;
233}
234
235extern __inline __m64
236__attribute__((__gnu_inline__, __always_inline__, __artificial__))
237_mm_hadds_pi16 (__m64 __A, __m64 __B)
238{
239 const __v4si __zero = { 0 };
240 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
241 __v4si __D = vec_sum4s (__C, __zero);
242 __C = vec_packs (__D, __D);
243 return (__m64) ((__v2du) __C)[1];
244}
245
246extern __inline __m128i
247__attribute__((__gnu_inline__, __always_inline__, __artificial__))
248_mm_hsub_epi16 (__m128i __A, __m128i __B)
249{
250 const __v16qu __P =
251 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
252 const __v16qu __Q =
253 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
254 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
255 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
256 return (__m128i) vec_sub (__C, __D);
257}
258
259extern __inline __m128i
260__attribute__((__gnu_inline__, __always_inline__, __artificial__))
261_mm_hsub_epi32 (__m128i __A, __m128i __B)
262{
263 const __v16qu __P =
264 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
265 const __v16qu __Q =
266 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
9a0317e7
PC
267 __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
268 __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
b1ddadac
PC
269 return (__m128i) vec_sub (__C, __D);
270}
271
272extern __inline __m64
273__attribute__((__gnu_inline__, __always_inline__, __artificial__))
274_mm_hsub_pi16 (__m64 __A, __m64 __B)
275{
276 const __v16qu __P =
277 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
278 const __v16qu __Q =
279 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
280 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
281 __v8hi __D = vec_perm (__C, __C, __Q);
282 __C = vec_perm (__C, __C, __P);
283 __C = vec_sub (__C, __D);
284 return (__m64) ((__v2du) __C)[1];
285}
286
287extern __inline __m64
288__attribute__((__gnu_inline__, __always_inline__, __artificial__))
289_mm_hsub_pi32 (__m64 __A, __m64 __B)
290{
291 const __v16qu __P =
292 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
293 const __v16qu __Q =
294 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
295 __v4si __C = (__v4si) (__v2du) { __A, __B };
296 __v4si __D = vec_perm (__C, __C, __Q);
297 __C = vec_perm (__C, __C, __P);
298 __C = vec_sub (__C, __D);
299 return (__m64) ((__v2du) __C)[1];
300}
301
302extern __inline __m128i
303__attribute__((__gnu_inline__, __always_inline__, __artificial__))
304_mm_hsubs_epi16 (__m128i __A, __m128i __B)
305{
306 const __v16qu __P =
307 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
308 const __v16qu __Q =
309 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
310 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
311 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
312 return (__m128i) vec_subs (__C, __D);
313}
314
315extern __inline __m64
316__attribute__((__gnu_inline__, __always_inline__, __artificial__))
317_mm_hsubs_pi16 (__m64 __A, __m64 __B)
318{
319 const __v16qu __P =
320 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
321 const __v16qu __Q =
322 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
323 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
324 __v8hi __D = vec_perm (__C, __C, __P);
325 __v8hi __E = vec_perm (__C, __C, __Q);
326 __C = vec_subs (__D, __E);
327 return (__m64) ((__v2du) __C)[1];
328}
329
330extern __inline __m128i
331__attribute__((__gnu_inline__, __always_inline__, __artificial__))
332_mm_shuffle_epi8 (__m128i __A, __m128i __B)
333{
334 const __v16qi __zero = { 0 };
9a0317e7 335 __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
b1ddadac
PC
336 __v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
337 return (__m128i) vec_sel (__C, __zero, __select);
338}
339
340extern __inline __m64
341__attribute__((__gnu_inline__, __always_inline__, __artificial__))
342_mm_shuffle_pi8 (__m64 __A, __m64 __B)
343{
344 const __v16qi __zero = { 0 };
345 __v16qi __C = (__v16qi) (__v2du) { __A, __A };
346 __v16qi __D = (__v16qi) (__v2du) { __B, __B };
9a0317e7 347 __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
b1ddadac
PC
348 __C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
349 __C = vec_sel (__C, __zero, __select);
350 return (__m64) ((__v2du) (__C))[0];
351}
352
353extern __inline __m128i
354__attribute__((__gnu_inline__, __always_inline__, __artificial__))
355_mm_sign_epi8 (__m128i __A, __m128i __B)
356{
357 const __v16qi __zero = { 0 };
358 __v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero);
359 __v16qi __selectpos =
360 (__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero));
361 __v16qi __conv = vec_add (__selectneg, __selectpos);
362 return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
363}
364
365extern __inline __m128i
366__attribute__((__gnu_inline__, __always_inline__, __artificial__))
367_mm_sign_epi16 (__m128i __A, __m128i __B)
368{
369 const __v8hi __zero = { 0 };
370 __v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero);
371 __v8hi __selectpos =
372 (__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero));
373 __v8hi __conv = vec_add (__selectneg, __selectpos);
374 return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
375}
376
377extern __inline __m128i
378__attribute__((__gnu_inline__, __always_inline__, __artificial__))
379_mm_sign_epi32 (__m128i __A, __m128i __B)
380{
381 const __v4si __zero = { 0 };
382 __v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero);
383 __v4si __selectpos =
384 (__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero));
385 __v4si __conv = vec_add (__selectneg, __selectpos);
386 return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
387}
388
389extern __inline __m64
390__attribute__((__gnu_inline__, __always_inline__, __artificial__))
391_mm_sign_pi8 (__m64 __A, __m64 __B)
392{
393 const __v16qi __zero = { 0 };
394 __v16qi __C = (__v16qi) (__v2du) { __A, __A };
395 __v16qi __D = (__v16qi) (__v2du) { __B, __B };
396 __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
397 return (__m64) ((__v2du) (__C))[0];
398}
399
400extern __inline __m64
401__attribute__((__gnu_inline__, __always_inline__, __artificial__))
402_mm_sign_pi16 (__m64 __A, __m64 __B)
403{
404 const __v8hi __zero = { 0 };
405 __v8hi __C = (__v8hi) (__v2du) { __A, __A };
406 __v8hi __D = (__v8hi) (__v2du) { __B, __B };
407 __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
408 return (__m64) ((__v2du) (__C))[0];
409}
410
411extern __inline __m64
412__attribute__((__gnu_inline__, __always_inline__, __artificial__))
413_mm_sign_pi32 (__m64 __A, __m64 __B)
414{
415 const __v4si __zero = { 0 };
416 __v4si __C = (__v4si) (__v2du) { __A, __A };
417 __v4si __D = (__v4si) (__v2du) { __B, __B };
418 __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
419 return (__m64) ((__v2du) (__C))[0];
420}
421
422extern __inline __m128i
423__attribute__((__gnu_inline__, __always_inline__, __artificial__))
424_mm_maddubs_epi16 (__m128i __A, __m128i __B)
425{
b1ddadac 426 __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
9a0317e7
PC
427 __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
428 __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
429 __v8hi __E = vec_unpackh ((__v16qi) __B);
430 __v8hi __F = vec_unpackl ((__v16qi) __B);
b1ddadac
PC
431 __C = vec_mul (__C, __E);
432 __D = vec_mul (__D, __F);
433 const __v16qu __odds =
434 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
435 const __v16qu __evens =
436 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
437 __E = vec_perm (__C, __D, __odds);
438 __F = vec_perm (__C, __D, __evens);
439 return (__m128i) vec_adds (__E, __F);
440}
441
442extern __inline __m64
443__attribute__((__gnu_inline__, __always_inline__, __artificial__))
444_mm_maddubs_pi16 (__m64 __A, __m64 __B)
445{
446 __v8hi __C = (__v8hi) (__v2du) { __A, __A };
447 __C = vec_unpackl ((__v16qi) __C);
9a0317e7
PC
448 const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
449 __C = vec_and (__C, __unsigned);
b1ddadac
PC
450 __v8hi __D = (__v8hi) (__v2du) { __B, __B };
451 __D = vec_unpackl ((__v16qi) __D);
b1ddadac
PC
452 __D = vec_mul (__C, __D);
453 const __v16qu __odds =
454 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
455 const __v16qu __evens =
456 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
457 __C = vec_perm (__D, __D, __odds);
458 __D = vec_perm (__D, __D, __evens);
459 __C = vec_adds (__C, __D);
460 return (__m64) ((__v2du) (__C))[0];
461}
462
463extern __inline __m128i
464__attribute__((__gnu_inline__, __always_inline__, __artificial__))
465_mm_mulhrs_epi16 (__m128i __A, __m128i __B)
466{
467 __v4si __C = vec_unpackh ((__v8hi) __A);
468 __v4si __D = vec_unpackh ((__v8hi) __B);
469 __C = vec_mul (__C, __D);
470 __D = vec_unpackl ((__v8hi) __A);
471 __v4si __E = vec_unpackl ((__v8hi) __B);
472 __D = vec_mul (__D, __E);
473 const __v4su __shift = vec_splats ((unsigned int) 14);
474 __C = vec_sr (__C, __shift);
475 __D = vec_sr (__D, __shift);
476 const __v4si __ones = vec_splats ((signed int) 1);
477 __C = vec_add (__C, __ones);
478 __C = vec_sr (__C, (__v4su) __ones);
479 __D = vec_add (__D, __ones);
480 __D = vec_sr (__D, (__v4su) __ones);
481 return (__m128i) vec_pack (__C, __D);
482}
483
484extern __inline __m64
485__attribute__((__gnu_inline__, __always_inline__, __artificial__))
486_mm_mulhrs_pi16 (__m64 __A, __m64 __B)
487{
488 __v4si __C = (__v4si) (__v2du) { __A, __A };
489 __C = vec_unpackh ((__v8hi) __C);
490 __v4si __D = (__v4si) (__v2du) { __B, __B };
491 __D = vec_unpackh ((__v8hi) __D);
492 __C = vec_mul (__C, __D);
493 const __v4su __shift = vec_splats ((unsigned int) 14);
494 __C = vec_sr (__C, __shift);
495 const __v4si __ones = vec_splats ((signed int) 1);
496 __C = vec_add (__C, __ones);
497 __C = vec_sr (__C, (__v4su) __ones);
498 __v8hi __E = vec_pack (__C, __D);
499 return (__m64) ((__v2du) (__E))[0];
500}
501
502#endif