]>
Commit | Line | Data |
---|---|---|
8ae8bad7 CX |
1 | /* Intrinsics for Loongson MultiMedia extension Instructions operations. |
2 | ||
a5544970 | 3 | Copyright (C) 2008-2019 Free Software Foundation, Inc. |
8ae8bad7 CX |
4 | Contributed by CodeSourcery. |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published | |
10 | by the Free Software Foundation; either version 3, or (at your | |
11 | option) any later version. | |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
16 | License for more details. | |
17 | ||
18 | Under Section 7 of GPL version 3, you are granted additional | |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
26 | ||
27 | #ifndef _GCC_LOONGSON_MMIINTRIN_H | |
28 | #define _GCC_LOONGSON_MMIINTRIN_H | |
29 | ||
30 | #if !defined(__mips_loongson_mmi) | |
31 | # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use | |
32 | loongson-mmiintrin.h" | |
33 | #endif | |
34 | ||
35 | #ifdef __cplusplus | |
36 | extern "C" { | |
37 | #endif | |
38 | ||
39 | #include <stdint.h> | |
40 | ||
41 | /* Vectors of unsigned bytes, halfwords and words. */ | |
42 | typedef uint8_t uint8x8_t __attribute__((vector_size (8))); | |
43 | typedef uint16_t uint16x4_t __attribute__((vector_size (8))); | |
44 | typedef uint32_t uint32x2_t __attribute__((vector_size (8))); | |
45 | ||
46 | /* Vectors of signed bytes, halfwords and words. */ | |
47 | typedef int8_t int8x8_t __attribute__((vector_size (8))); | |
48 | typedef int16_t int16x4_t __attribute__((vector_size (8))); | |
49 | typedef int32_t int32x2_t __attribute__((vector_size (8))); | |
50 | ||
51 | /* SIMD intrinsics. | |
52 | Unless otherwise noted, calls to the functions below will expand into | |
53 | precisely one machine instruction, modulo any moves required to | |
54 | satisfy register allocation constraints. */ | |
55 | ||
56 | /* Pack with signed saturation. */ | |
57 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
58 | packsswh (int32x2_t s, int32x2_t t) | |
59 | { | |
60 | return __builtin_loongson_packsswh (s, t); | |
61 | } | |
62 | ||
63 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
64 | packsshb (int16x4_t s, int16x4_t t) | |
65 | { | |
66 | return __builtin_loongson_packsshb (s, t); | |
67 | } | |
68 | ||
69 | /* Pack with unsigned saturation. */ | |
70 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
71 | packushb (uint16x4_t s, uint16x4_t t) | |
72 | { | |
73 | return __builtin_loongson_packushb (s, t); | |
74 | } | |
75 | ||
76 | /* Vector addition, treating overflow by wraparound. */ | |
77 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
78 | paddw_u (uint32x2_t s, uint32x2_t t) | |
79 | { | |
80 | return __builtin_loongson_paddw_u (s, t); | |
81 | } | |
82 | ||
83 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
84 | paddh_u (uint16x4_t s, uint16x4_t t) | |
85 | { | |
86 | return __builtin_loongson_paddh_u (s, t); | |
87 | } | |
88 | ||
89 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
90 | paddb_u (uint8x8_t s, uint8x8_t t) | |
91 | { | |
92 | return __builtin_loongson_paddb_u (s, t); | |
93 | } | |
94 | ||
95 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
96 | paddw_s (int32x2_t s, int32x2_t t) | |
97 | { | |
98 | return __builtin_loongson_paddw_s (s, t); | |
99 | } | |
100 | ||
101 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
102 | paddh_s (int16x4_t s, int16x4_t t) | |
103 | { | |
104 | return __builtin_loongson_paddh_s (s, t); | |
105 | } | |
106 | ||
107 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
108 | paddb_s (int8x8_t s, int8x8_t t) | |
109 | { | |
110 | return __builtin_loongson_paddb_s (s, t); | |
111 | } | |
112 | ||
113 | /* Addition of doubleword integers, treating overflow by wraparound. */ | |
114 | __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
115 | paddd_u (uint64_t s, uint64_t t) | |
116 | { | |
117 | return __builtin_loongson_paddd_u (s, t); | |
118 | } | |
119 | ||
120 | __extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
121 | paddd_s (int64_t s, int64_t t) | |
122 | { | |
123 | return __builtin_loongson_paddd_s (s, t); | |
124 | } | |
125 | ||
126 | /* Vector addition, treating overflow by signed saturation. */ | |
127 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
128 | paddsh (int16x4_t s, int16x4_t t) | |
129 | { | |
130 | return __builtin_loongson_paddsh (s, t); | |
131 | } | |
132 | ||
133 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
134 | paddsb (int8x8_t s, int8x8_t t) | |
135 | { | |
136 | return __builtin_loongson_paddsb (s, t); | |
137 | } | |
138 | ||
139 | /* Vector addition, treating overflow by unsigned saturation. */ | |
140 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
141 | paddush (uint16x4_t s, uint16x4_t t) | |
142 | { | |
143 | return __builtin_loongson_paddush (s, t); | |
144 | } | |
145 | ||
146 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
147 | paddusb (uint8x8_t s, uint8x8_t t) | |
148 | { | |
149 | return __builtin_loongson_paddusb (s, t); | |
150 | } | |
151 | ||
152 | /* Logical AND NOT. */ | |
153 | __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
154 | pandn_ud (uint64_t s, uint64_t t) | |
155 | { | |
156 | return __builtin_loongson_pandn_ud (s, t); | |
157 | } | |
158 | ||
159 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
160 | pandn_uw (uint32x2_t s, uint32x2_t t) | |
161 | { | |
162 | return __builtin_loongson_pandn_uw (s, t); | |
163 | } | |
164 | ||
165 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
166 | pandn_uh (uint16x4_t s, uint16x4_t t) | |
167 | { | |
168 | return __builtin_loongson_pandn_uh (s, t); | |
169 | } | |
170 | ||
171 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
172 | pandn_ub (uint8x8_t s, uint8x8_t t) | |
173 | { | |
174 | return __builtin_loongson_pandn_ub (s, t); | |
175 | } | |
176 | ||
177 | __extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
178 | pandn_sd (int64_t s, int64_t t) | |
179 | { | |
180 | return __builtin_loongson_pandn_sd (s, t); | |
181 | } | |
182 | ||
183 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
184 | pandn_sw (int32x2_t s, int32x2_t t) | |
185 | { | |
186 | return __builtin_loongson_pandn_sw (s, t); | |
187 | } | |
188 | ||
189 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
190 | pandn_sh (int16x4_t s, int16x4_t t) | |
191 | { | |
192 | return __builtin_loongson_pandn_sh (s, t); | |
193 | } | |
194 | ||
195 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
196 | pandn_sb (int8x8_t s, int8x8_t t) | |
197 | { | |
198 | return __builtin_loongson_pandn_sb (s, t); | |
199 | } | |
200 | ||
201 | /* Average. */ | |
202 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
203 | pavgh (uint16x4_t s, uint16x4_t t) | |
204 | { | |
205 | return __builtin_loongson_pavgh (s, t); | |
206 | } | |
207 | ||
208 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
209 | pavgb (uint8x8_t s, uint8x8_t t) | |
210 | { | |
211 | return __builtin_loongson_pavgb (s, t); | |
212 | } | |
213 | ||
214 | /* Equality test. */ | |
215 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
216 | pcmpeqw_u (uint32x2_t s, uint32x2_t t) | |
217 | { | |
218 | return __builtin_loongson_pcmpeqw_u (s, t); | |
219 | } | |
220 | ||
221 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
222 | pcmpeqh_u (uint16x4_t s, uint16x4_t t) | |
223 | { | |
224 | return __builtin_loongson_pcmpeqh_u (s, t); | |
225 | } | |
226 | ||
227 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
228 | pcmpeqb_u (uint8x8_t s, uint8x8_t t) | |
229 | { | |
230 | return __builtin_loongson_pcmpeqb_u (s, t); | |
231 | } | |
232 | ||
233 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
234 | pcmpeqw_s (int32x2_t s, int32x2_t t) | |
235 | { | |
236 | return __builtin_loongson_pcmpeqw_s (s, t); | |
237 | } | |
238 | ||
239 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
240 | pcmpeqh_s (int16x4_t s, int16x4_t t) | |
241 | { | |
242 | return __builtin_loongson_pcmpeqh_s (s, t); | |
243 | } | |
244 | ||
245 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
246 | pcmpeqb_s (int8x8_t s, int8x8_t t) | |
247 | { | |
248 | return __builtin_loongson_pcmpeqb_s (s, t); | |
249 | } | |
250 | ||
251 | /* Greater-than test. */ | |
252 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
253 | pcmpgtw_u (uint32x2_t s, uint32x2_t t) | |
254 | { | |
255 | return __builtin_loongson_pcmpgtw_u (s, t); | |
256 | } | |
257 | ||
258 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
259 | pcmpgth_u (uint16x4_t s, uint16x4_t t) | |
260 | { | |
261 | return __builtin_loongson_pcmpgth_u (s, t); | |
262 | } | |
263 | ||
264 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
265 | pcmpgtb_u (uint8x8_t s, uint8x8_t t) | |
266 | { | |
267 | return __builtin_loongson_pcmpgtb_u (s, t); | |
268 | } | |
269 | ||
270 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
271 | pcmpgtw_s (int32x2_t s, int32x2_t t) | |
272 | { | |
273 | return __builtin_loongson_pcmpgtw_s (s, t); | |
274 | } | |
275 | ||
276 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
277 | pcmpgth_s (int16x4_t s, int16x4_t t) | |
278 | { | |
279 | return __builtin_loongson_pcmpgth_s (s, t); | |
280 | } | |
281 | ||
282 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
283 | pcmpgtb_s (int8x8_t s, int8x8_t t) | |
284 | { | |
285 | return __builtin_loongson_pcmpgtb_s (s, t); | |
286 | } | |
287 | ||
288 | /* Extract halfword. */ | |
289 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
290 | pextrh_u (uint16x4_t s, int field /* 0--3. */) | |
291 | { | |
292 | return __builtin_loongson_pextrh_u (s, field); | |
293 | } | |
294 | ||
295 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
296 | pextrh_s (int16x4_t s, int field /* 0--3. */) | |
297 | { | |
298 | return __builtin_loongson_pextrh_s (s, field); | |
299 | } | |
300 | ||
301 | /* Insert halfword. */ | |
302 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
303 | pinsrh_0_u (uint16x4_t s, uint16x4_t t) | |
304 | { | |
305 | return __builtin_loongson_pinsrh_0_u (s, t); | |
306 | } | |
307 | ||
308 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
309 | pinsrh_1_u (uint16x4_t s, uint16x4_t t) | |
310 | { | |
311 | return __builtin_loongson_pinsrh_1_u (s, t); | |
312 | } | |
313 | ||
314 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
315 | pinsrh_2_u (uint16x4_t s, uint16x4_t t) | |
316 | { | |
317 | return __builtin_loongson_pinsrh_2_u (s, t); | |
318 | } | |
319 | ||
320 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
321 | pinsrh_3_u (uint16x4_t s, uint16x4_t t) | |
322 | { | |
323 | return __builtin_loongson_pinsrh_3_u (s, t); | |
324 | } | |
325 | ||
326 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
327 | pinsrh_0_s (int16x4_t s, int16x4_t t) | |
328 | { | |
329 | return __builtin_loongson_pinsrh_0_s (s, t); | |
330 | } | |
331 | ||
332 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
333 | pinsrh_1_s (int16x4_t s, int16x4_t t) | |
334 | { | |
335 | return __builtin_loongson_pinsrh_1_s (s, t); | |
336 | } | |
337 | ||
338 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
339 | pinsrh_2_s (int16x4_t s, int16x4_t t) | |
340 | { | |
341 | return __builtin_loongson_pinsrh_2_s (s, t); | |
342 | } | |
343 | ||
344 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
345 | pinsrh_3_s (int16x4_t s, int16x4_t t) | |
346 | { | |
347 | return __builtin_loongson_pinsrh_3_s (s, t); | |
348 | } | |
349 | ||
350 | /* Multiply and add. */ | |
351 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
352 | pmaddhw (int16x4_t s, int16x4_t t) | |
353 | { | |
354 | return __builtin_loongson_pmaddhw (s, t); | |
355 | } | |
356 | ||
357 | /* Maximum of signed halfwords. */ | |
358 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
359 | pmaxsh (int16x4_t s, int16x4_t t) | |
360 | { | |
361 | return __builtin_loongson_pmaxsh (s, t); | |
362 | } | |
363 | ||
364 | /* Maximum of unsigned bytes. */ | |
365 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
366 | pmaxub (uint8x8_t s, uint8x8_t t) | |
367 | { | |
368 | return __builtin_loongson_pmaxub (s, t); | |
369 | } | |
370 | ||
371 | /* Minimum of signed halfwords. */ | |
372 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
373 | pminsh (int16x4_t s, int16x4_t t) | |
374 | { | |
375 | return __builtin_loongson_pminsh (s, t); | |
376 | } | |
377 | ||
378 | /* Minimum of unsigned bytes. */ | |
379 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
380 | pminub (uint8x8_t s, uint8x8_t t) | |
381 | { | |
382 | return __builtin_loongson_pminub (s, t); | |
383 | } | |
384 | ||
385 | /* Move byte mask. */ | |
386 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
387 | pmovmskb_u (uint8x8_t s) | |
388 | { | |
389 | return __builtin_loongson_pmovmskb_u (s); | |
390 | } | |
391 | ||
392 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
393 | pmovmskb_s (int8x8_t s) | |
394 | { | |
395 | return __builtin_loongson_pmovmskb_s (s); | |
396 | } | |
397 | ||
398 | /* Multiply unsigned integers and store high result. */ | |
399 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
400 | pmulhuh (uint16x4_t s, uint16x4_t t) | |
401 | { | |
402 | return __builtin_loongson_pmulhuh (s, t); | |
403 | } | |
404 | ||
405 | /* Multiply signed integers and store high result. */ | |
406 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
407 | pmulhh (int16x4_t s, int16x4_t t) | |
408 | { | |
409 | return __builtin_loongson_pmulhh (s, t); | |
410 | } | |
411 | ||
412 | /* Multiply signed integers and store low result. */ | |
413 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
414 | pmullh (int16x4_t s, int16x4_t t) | |
415 | { | |
416 | return __builtin_loongson_pmullh (s, t); | |
417 | } | |
418 | ||
419 | /* Multiply unsigned word integers. */ | |
420 | __extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
421 | pmuluw (uint32x2_t s, uint32x2_t t) | |
422 | { | |
423 | return __builtin_loongson_pmuluw (s, t); | |
424 | } | |
425 | ||
426 | /* Absolute difference. */ | |
427 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
428 | pasubub (uint8x8_t s, uint8x8_t t) | |
429 | { | |
430 | return __builtin_loongson_pasubub (s, t); | |
431 | } | |
432 | ||
433 | /* Sum of unsigned byte integers. */ | |
434 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
435 | biadd (uint8x8_t s) | |
436 | { | |
437 | return __builtin_loongson_biadd (s); | |
438 | } | |
439 | ||
440 | /* Sum of absolute differences. | |
441 | Note that this intrinsic expands into two machine instructions: | |
442 | PASUBUB followed by BIADD. */ | |
443 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
444 | psadbh (uint8x8_t s, uint8x8_t t) | |
445 | { | |
446 | return __builtin_loongson_psadbh (s, t); | |
447 | } | |
448 | ||
449 | /* Shuffle halfwords. */ | |
450 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
451 | pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) | |
452 | { | |
453 | return __builtin_loongson_pshufh_u (s, order); | |
454 | } | |
455 | ||
456 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
457 | pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) | |
458 | { | |
459 | return __builtin_loongson_pshufh_s (s, order); | |
460 | } | |
461 | ||
462 | /* Shift left logical. */ | |
463 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
464 | psllh_u (uint16x4_t s, uint8_t amount) | |
465 | { | |
466 | return __builtin_loongson_psllh_u (s, amount); | |
467 | } | |
468 | ||
469 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
470 | psllh_s (int16x4_t s, uint8_t amount) | |
471 | { | |
472 | return __builtin_loongson_psllh_s (s, amount); | |
473 | } | |
474 | ||
475 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
476 | psllw_u (uint32x2_t s, uint8_t amount) | |
477 | { | |
478 | return __builtin_loongson_psllw_u (s, amount); | |
479 | } | |
480 | ||
481 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
482 | psllw_s (int32x2_t s, uint8_t amount) | |
483 | { | |
484 | return __builtin_loongson_psllw_s (s, amount); | |
485 | } | |
486 | ||
487 | /* Shift right logical. */ | |
488 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
489 | psrlh_u (uint16x4_t s, uint8_t amount) | |
490 | { | |
491 | return __builtin_loongson_psrlh_u (s, amount); | |
492 | } | |
493 | ||
494 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
495 | psrlh_s (int16x4_t s, uint8_t amount) | |
496 | { | |
497 | return __builtin_loongson_psrlh_s (s, amount); | |
498 | } | |
499 | ||
500 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
501 | psrlw_u (uint32x2_t s, uint8_t amount) | |
502 | { | |
503 | return __builtin_loongson_psrlw_u (s, amount); | |
504 | } | |
505 | ||
506 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
507 | psrlw_s (int32x2_t s, uint8_t amount) | |
508 | { | |
509 | return __builtin_loongson_psrlw_s (s, amount); | |
510 | } | |
511 | ||
512 | /* Shift right arithmetic. */ | |
513 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
514 | psrah_u (uint16x4_t s, uint8_t amount) | |
515 | { | |
516 | return __builtin_loongson_psrah_u (s, amount); | |
517 | } | |
518 | ||
519 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
520 | psrah_s (int16x4_t s, uint8_t amount) | |
521 | { | |
522 | return __builtin_loongson_psrah_s (s, amount); | |
523 | } | |
524 | ||
525 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
526 | psraw_u (uint32x2_t s, uint8_t amount) | |
527 | { | |
528 | return __builtin_loongson_psraw_u (s, amount); | |
529 | } | |
530 | ||
531 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
532 | psraw_s (int32x2_t s, uint8_t amount) | |
533 | { | |
534 | return __builtin_loongson_psraw_s (s, amount); | |
535 | } | |
536 | ||
537 | /* Vector subtraction, treating overflow by wraparound. */ | |
538 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
539 | psubw_u (uint32x2_t s, uint32x2_t t) | |
540 | { | |
541 | return __builtin_loongson_psubw_u (s, t); | |
542 | } | |
543 | ||
544 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
545 | psubh_u (uint16x4_t s, uint16x4_t t) | |
546 | { | |
547 | return __builtin_loongson_psubh_u (s, t); | |
548 | } | |
549 | ||
550 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
551 | psubb_u (uint8x8_t s, uint8x8_t t) | |
552 | { | |
553 | return __builtin_loongson_psubb_u (s, t); | |
554 | } | |
555 | ||
556 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
557 | psubw_s (int32x2_t s, int32x2_t t) | |
558 | { | |
559 | return __builtin_loongson_psubw_s (s, t); | |
560 | } | |
561 | ||
562 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
563 | psubh_s (int16x4_t s, int16x4_t t) | |
564 | { | |
565 | return __builtin_loongson_psubh_s (s, t); | |
566 | } | |
567 | ||
568 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
569 | psubb_s (int8x8_t s, int8x8_t t) | |
570 | { | |
571 | return __builtin_loongson_psubb_s (s, t); | |
572 | } | |
573 | ||
574 | /* Subtraction of doubleword integers, treating overflow by wraparound. */ | |
575 | __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
576 | psubd_u (uint64_t s, uint64_t t) | |
577 | { | |
578 | return __builtin_loongson_psubd_u (s, t); | |
579 | } | |
580 | ||
581 | __extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
582 | psubd_s (int64_t s, int64_t t) | |
583 | { | |
584 | return __builtin_loongson_psubd_s (s, t); | |
585 | } | |
586 | ||
587 | /* Vector subtraction, treating overflow by signed saturation. */ | |
588 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
589 | psubsh (int16x4_t s, int16x4_t t) | |
590 | { | |
591 | return __builtin_loongson_psubsh (s, t); | |
592 | } | |
593 | ||
594 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
595 | psubsb (int8x8_t s, int8x8_t t) | |
596 | { | |
597 | return __builtin_loongson_psubsb (s, t); | |
598 | } | |
599 | ||
600 | /* Vector subtraction, treating overflow by unsigned saturation. */ | |
601 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
602 | psubush (uint16x4_t s, uint16x4_t t) | |
603 | { | |
604 | return __builtin_loongson_psubush (s, t); | |
605 | } | |
606 | ||
607 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
608 | psubusb (uint8x8_t s, uint8x8_t t) | |
609 | { | |
610 | return __builtin_loongson_psubusb (s, t); | |
611 | } | |
612 | ||
613 | /* Unpack high data. */ | |
614 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
615 | punpckhwd_u (uint32x2_t s, uint32x2_t t) | |
616 | { | |
617 | return __builtin_loongson_punpckhwd_u (s, t); | |
618 | } | |
619 | ||
620 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
621 | punpckhhw_u (uint16x4_t s, uint16x4_t t) | |
622 | { | |
623 | return __builtin_loongson_punpckhhw_u (s, t); | |
624 | } | |
625 | ||
626 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
627 | punpckhbh_u (uint8x8_t s, uint8x8_t t) | |
628 | { | |
629 | return __builtin_loongson_punpckhbh_u (s, t); | |
630 | } | |
631 | ||
632 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
633 | punpckhwd_s (int32x2_t s, int32x2_t t) | |
634 | { | |
635 | return __builtin_loongson_punpckhwd_s (s, t); | |
636 | } | |
637 | ||
638 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
639 | punpckhhw_s (int16x4_t s, int16x4_t t) | |
640 | { | |
641 | return __builtin_loongson_punpckhhw_s (s, t); | |
642 | } | |
643 | ||
644 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
645 | punpckhbh_s (int8x8_t s, int8x8_t t) | |
646 | { | |
647 | return __builtin_loongson_punpckhbh_s (s, t); | |
648 | } | |
649 | ||
650 | /* Unpack low data. */ | |
651 | __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
652 | punpcklwd_u (uint32x2_t s, uint32x2_t t) | |
653 | { | |
654 | return __builtin_loongson_punpcklwd_u (s, t); | |
655 | } | |
656 | ||
657 | __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
658 | punpcklhw_u (uint16x4_t s, uint16x4_t t) | |
659 | { | |
660 | return __builtin_loongson_punpcklhw_u (s, t); | |
661 | } | |
662 | ||
663 | __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
664 | punpcklbh_u (uint8x8_t s, uint8x8_t t) | |
665 | { | |
666 | return __builtin_loongson_punpcklbh_u (s, t); | |
667 | } | |
668 | ||
669 | __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
670 | punpcklwd_s (int32x2_t s, int32x2_t t) | |
671 | { | |
672 | return __builtin_loongson_punpcklwd_s (s, t); | |
673 | } | |
674 | ||
675 | __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
676 | punpcklhw_s (int16x4_t s, int16x4_t t) | |
677 | { | |
678 | return __builtin_loongson_punpcklhw_s (s, t); | |
679 | } | |
680 | ||
681 | __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
682 | punpcklbh_s (int8x8_t s, int8x8_t t) | |
683 | { | |
684 | return __builtin_loongson_punpcklbh_s (s, t); | |
685 | } | |
686 | ||
687 | #ifdef __cplusplus | |
688 | } | |
689 | #endif | |
690 | ||
691 | #endif |