]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/arm_neon.h
Update copyright years in gcc/
[thirdparty/gcc.git] / gcc / config / aarch64 / arm_neon.h
1 /* ARM NEON intrinsics include file.
2
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
29
30 #include <stdint.h>
31
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
34
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef int64_t int64x1_t;
42 typedef int32_t int32x1_t;
43 typedef int16_t int16x1_t;
44 typedef int8_t int8x1_t;
45 typedef double float64x1_t;
46 typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef uint64_t uint64x1_t;
59 typedef uint32_t uint32x1_t;
60 typedef uint16_t uint16x1_t;
61 typedef uint8_t uint8x1_t;
62 typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64 typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66 typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68 typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70 typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72 typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74 typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76 typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78 typedef __builtin_aarch64_simd_poly64 poly64x2_t
79 __attribute__ ((__vector_size__ (16)));
80 typedef __builtin_aarch64_simd_uqi uint8x16_t
81 __attribute__ ((__vector_size__ (16)));
82 typedef __builtin_aarch64_simd_uhi uint16x8_t
83 __attribute__ ((__vector_size__ (16)));
84 typedef __builtin_aarch64_simd_usi uint32x4_t
85 __attribute__ ((__vector_size__ (16)));
86 typedef __builtin_aarch64_simd_udi uint64x2_t
87 __attribute__ ((__vector_size__ (16)));
88
89 typedef float float32_t;
90 typedef double float64_t;
91 typedef __builtin_aarch64_simd_poly8 poly8_t;
92 typedef __builtin_aarch64_simd_poly16 poly16_t;
93 typedef __builtin_aarch64_simd_poly64 poly64_t;
94 typedef __builtin_aarch64_simd_poly128 poly128_t;
95
96 typedef struct int8x8x2_t
97 {
98 int8x8_t val[2];
99 } int8x8x2_t;
100
101 typedef struct int8x16x2_t
102 {
103 int8x16_t val[2];
104 } int8x16x2_t;
105
106 typedef struct int16x4x2_t
107 {
108 int16x4_t val[2];
109 } int16x4x2_t;
110
111 typedef struct int16x8x2_t
112 {
113 int16x8_t val[2];
114 } int16x8x2_t;
115
116 typedef struct int32x2x2_t
117 {
118 int32x2_t val[2];
119 } int32x2x2_t;
120
121 typedef struct int32x4x2_t
122 {
123 int32x4_t val[2];
124 } int32x4x2_t;
125
126 typedef struct int64x1x2_t
127 {
128 int64x1_t val[2];
129 } int64x1x2_t;
130
131 typedef struct int64x2x2_t
132 {
133 int64x2_t val[2];
134 } int64x2x2_t;
135
136 typedef struct uint8x8x2_t
137 {
138 uint8x8_t val[2];
139 } uint8x8x2_t;
140
141 typedef struct uint8x16x2_t
142 {
143 uint8x16_t val[2];
144 } uint8x16x2_t;
145
146 typedef struct uint16x4x2_t
147 {
148 uint16x4_t val[2];
149 } uint16x4x2_t;
150
151 typedef struct uint16x8x2_t
152 {
153 uint16x8_t val[2];
154 } uint16x8x2_t;
155
156 typedef struct uint32x2x2_t
157 {
158 uint32x2_t val[2];
159 } uint32x2x2_t;
160
161 typedef struct uint32x4x2_t
162 {
163 uint32x4_t val[2];
164 } uint32x4x2_t;
165
166 typedef struct uint64x1x2_t
167 {
168 uint64x1_t val[2];
169 } uint64x1x2_t;
170
171 typedef struct uint64x2x2_t
172 {
173 uint64x2_t val[2];
174 } uint64x2x2_t;
175
176 typedef struct float32x2x2_t
177 {
178 float32x2_t val[2];
179 } float32x2x2_t;
180
181 typedef struct float32x4x2_t
182 {
183 float32x4_t val[2];
184 } float32x4x2_t;
185
186 typedef struct float64x2x2_t
187 {
188 float64x2_t val[2];
189 } float64x2x2_t;
190
191 typedef struct float64x1x2_t
192 {
193 float64x1_t val[2];
194 } float64x1x2_t;
195
196 typedef struct poly8x8x2_t
197 {
198 poly8x8_t val[2];
199 } poly8x8x2_t;
200
201 typedef struct poly8x16x2_t
202 {
203 poly8x16_t val[2];
204 } poly8x16x2_t;
205
206 typedef struct poly16x4x2_t
207 {
208 poly16x4_t val[2];
209 } poly16x4x2_t;
210
211 typedef struct poly16x8x2_t
212 {
213 poly16x8_t val[2];
214 } poly16x8x2_t;
215
216 typedef struct int8x8x3_t
217 {
218 int8x8_t val[3];
219 } int8x8x3_t;
220
221 typedef struct int8x16x3_t
222 {
223 int8x16_t val[3];
224 } int8x16x3_t;
225
226 typedef struct int16x4x3_t
227 {
228 int16x4_t val[3];
229 } int16x4x3_t;
230
231 typedef struct int16x8x3_t
232 {
233 int16x8_t val[3];
234 } int16x8x3_t;
235
236 typedef struct int32x2x3_t
237 {
238 int32x2_t val[3];
239 } int32x2x3_t;
240
241 typedef struct int32x4x3_t
242 {
243 int32x4_t val[3];
244 } int32x4x3_t;
245
246 typedef struct int64x1x3_t
247 {
248 int64x1_t val[3];
249 } int64x1x3_t;
250
251 typedef struct int64x2x3_t
252 {
253 int64x2_t val[3];
254 } int64x2x3_t;
255
256 typedef struct uint8x8x3_t
257 {
258 uint8x8_t val[3];
259 } uint8x8x3_t;
260
261 typedef struct uint8x16x3_t
262 {
263 uint8x16_t val[3];
264 } uint8x16x3_t;
265
266 typedef struct uint16x4x3_t
267 {
268 uint16x4_t val[3];
269 } uint16x4x3_t;
270
271 typedef struct uint16x8x3_t
272 {
273 uint16x8_t val[3];
274 } uint16x8x3_t;
275
276 typedef struct uint32x2x3_t
277 {
278 uint32x2_t val[3];
279 } uint32x2x3_t;
280
281 typedef struct uint32x4x3_t
282 {
283 uint32x4_t val[3];
284 } uint32x4x3_t;
285
286 typedef struct uint64x1x3_t
287 {
288 uint64x1_t val[3];
289 } uint64x1x3_t;
290
291 typedef struct uint64x2x3_t
292 {
293 uint64x2_t val[3];
294 } uint64x2x3_t;
295
296 typedef struct float32x2x3_t
297 {
298 float32x2_t val[3];
299 } float32x2x3_t;
300
301 typedef struct float32x4x3_t
302 {
303 float32x4_t val[3];
304 } float32x4x3_t;
305
306 typedef struct float64x2x3_t
307 {
308 float64x2_t val[3];
309 } float64x2x3_t;
310
311 typedef struct float64x1x3_t
312 {
313 float64x1_t val[3];
314 } float64x1x3_t;
315
316 typedef struct poly8x8x3_t
317 {
318 poly8x8_t val[3];
319 } poly8x8x3_t;
320
321 typedef struct poly8x16x3_t
322 {
323 poly8x16_t val[3];
324 } poly8x16x3_t;
325
326 typedef struct poly16x4x3_t
327 {
328 poly16x4_t val[3];
329 } poly16x4x3_t;
330
331 typedef struct poly16x8x3_t
332 {
333 poly16x8_t val[3];
334 } poly16x8x3_t;
335
336 typedef struct int8x8x4_t
337 {
338 int8x8_t val[4];
339 } int8x8x4_t;
340
341 typedef struct int8x16x4_t
342 {
343 int8x16_t val[4];
344 } int8x16x4_t;
345
346 typedef struct int16x4x4_t
347 {
348 int16x4_t val[4];
349 } int16x4x4_t;
350
351 typedef struct int16x8x4_t
352 {
353 int16x8_t val[4];
354 } int16x8x4_t;
355
356 typedef struct int32x2x4_t
357 {
358 int32x2_t val[4];
359 } int32x2x4_t;
360
361 typedef struct int32x4x4_t
362 {
363 int32x4_t val[4];
364 } int32x4x4_t;
365
366 typedef struct int64x1x4_t
367 {
368 int64x1_t val[4];
369 } int64x1x4_t;
370
371 typedef struct int64x2x4_t
372 {
373 int64x2_t val[4];
374 } int64x2x4_t;
375
376 typedef struct uint8x8x4_t
377 {
378 uint8x8_t val[4];
379 } uint8x8x4_t;
380
381 typedef struct uint8x16x4_t
382 {
383 uint8x16_t val[4];
384 } uint8x16x4_t;
385
386 typedef struct uint16x4x4_t
387 {
388 uint16x4_t val[4];
389 } uint16x4x4_t;
390
391 typedef struct uint16x8x4_t
392 {
393 uint16x8_t val[4];
394 } uint16x8x4_t;
395
396 typedef struct uint32x2x4_t
397 {
398 uint32x2_t val[4];
399 } uint32x2x4_t;
400
401 typedef struct uint32x4x4_t
402 {
403 uint32x4_t val[4];
404 } uint32x4x4_t;
405
406 typedef struct uint64x1x4_t
407 {
408 uint64x1_t val[4];
409 } uint64x1x4_t;
410
411 typedef struct uint64x2x4_t
412 {
413 uint64x2_t val[4];
414 } uint64x2x4_t;
415
416 typedef struct float32x2x4_t
417 {
418 float32x2_t val[4];
419 } float32x2x4_t;
420
421 typedef struct float32x4x4_t
422 {
423 float32x4_t val[4];
424 } float32x4x4_t;
425
426 typedef struct float64x2x4_t
427 {
428 float64x2_t val[4];
429 } float64x2x4_t;
430
431 typedef struct float64x1x4_t
432 {
433 float64x1_t val[4];
434 } float64x1x4_t;
435
436 typedef struct poly8x8x4_t
437 {
438 poly8x8_t val[4];
439 } poly8x8x4_t;
440
441 typedef struct poly8x16x4_t
442 {
443 poly8x16_t val[4];
444 } poly8x16x4_t;
445
446 typedef struct poly16x4x4_t
447 {
448 poly16x4_t val[4];
449 } poly16x4x4_t;
450
451 typedef struct poly16x8x4_t
452 {
453 poly16x8_t val[4];
454 } poly16x8x4_t;
455
456 /* vget_lane internal macros. */
457
458 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
459 (__cast_ret \
460 __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
461
462 #define __aarch64_vget_lane_f32(__a, __b) \
463 __aarch64_vget_lane_any (v2sf, , , __a, __b)
464 #define __aarch64_vget_lane_f64(__a, __b) (__a)
465
466 #define __aarch64_vget_lane_p8(__a, __b) \
467 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
468 #define __aarch64_vget_lane_p16(__a, __b) \
469 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
470
471 #define __aarch64_vget_lane_s8(__a, __b) \
472 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
473 #define __aarch64_vget_lane_s16(__a, __b) \
474 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
475 #define __aarch64_vget_lane_s32(__a, __b) \
476 __aarch64_vget_lane_any (v2si, , ,__a, __b)
477 #define __aarch64_vget_lane_s64(__a, __b) (__a)
478
479 #define __aarch64_vget_lane_u8(__a, __b) \
480 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
481 #define __aarch64_vget_lane_u16(__a, __b) \
482 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
483 #define __aarch64_vget_lane_u32(__a, __b) \
484 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
485 #define __aarch64_vget_lane_u64(__a, __b) (__a)
486
487 #define __aarch64_vgetq_lane_f32(__a, __b) \
488 __aarch64_vget_lane_any (v4sf, , , __a, __b)
489 #define __aarch64_vgetq_lane_f64(__a, __b) \
490 __aarch64_vget_lane_any (v2df, , , __a, __b)
491
492 #define __aarch64_vgetq_lane_p8(__a, __b) \
493 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
494 #define __aarch64_vgetq_lane_p16(__a, __b) \
495 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
496
497 #define __aarch64_vgetq_lane_s8(__a, __b) \
498 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
499 #define __aarch64_vgetq_lane_s16(__a, __b) \
500 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
501 #define __aarch64_vgetq_lane_s32(__a, __b) \
502 __aarch64_vget_lane_any (v4si, , ,__a, __b)
503 #define __aarch64_vgetq_lane_s64(__a, __b) \
504 __aarch64_vget_lane_any (v2di, , ,__a, __b)
505
506 #define __aarch64_vgetq_lane_u8(__a, __b) \
507 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
508 #define __aarch64_vgetq_lane_u16(__a, __b) \
509 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
510 #define __aarch64_vgetq_lane_u32(__a, __b) \
511 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
512 #define __aarch64_vgetq_lane_u64(__a, __b) \
513 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
514
515 /* __aarch64_vdup_lane internal macros. */
516 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
517 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
518
519 #define __aarch64_vdup_lane_f32(__a, __b) \
520 __aarch64_vdup_lane_any (f32, , , __a, __b)
521 #define __aarch64_vdup_lane_f64(__a, __b) (__a)
522 #define __aarch64_vdup_lane_p8(__a, __b) \
523 __aarch64_vdup_lane_any (p8, , , __a, __b)
524 #define __aarch64_vdup_lane_p16(__a, __b) \
525 __aarch64_vdup_lane_any (p16, , , __a, __b)
526 #define __aarch64_vdup_lane_s8(__a, __b) \
527 __aarch64_vdup_lane_any (s8, , , __a, __b)
528 #define __aarch64_vdup_lane_s16(__a, __b) \
529 __aarch64_vdup_lane_any (s16, , , __a, __b)
530 #define __aarch64_vdup_lane_s32(__a, __b) \
531 __aarch64_vdup_lane_any (s32, , , __a, __b)
532 #define __aarch64_vdup_lane_s64(__a, __b) (__a)
533 #define __aarch64_vdup_lane_u8(__a, __b) \
534 __aarch64_vdup_lane_any (u8, , , __a, __b)
535 #define __aarch64_vdup_lane_u16(__a, __b) \
536 __aarch64_vdup_lane_any (u16, , , __a, __b)
537 #define __aarch64_vdup_lane_u32(__a, __b) \
538 __aarch64_vdup_lane_any (u32, , , __a, __b)
539 #define __aarch64_vdup_lane_u64(__a, __b) (__a)
540
541 /* __aarch64_vdup_laneq internal macros. */
542 #define __aarch64_vdup_laneq_f32(__a, __b) \
543 __aarch64_vdup_lane_any (f32, , q, __a, __b)
544 #define __aarch64_vdup_laneq_f64(__a, __b) \
545 __aarch64_vdup_lane_any (f64, , q, __a, __b)
546 #define __aarch64_vdup_laneq_p8(__a, __b) \
547 __aarch64_vdup_lane_any (p8, , q, __a, __b)
548 #define __aarch64_vdup_laneq_p16(__a, __b) \
549 __aarch64_vdup_lane_any (p16, , q, __a, __b)
550 #define __aarch64_vdup_laneq_s8(__a, __b) \
551 __aarch64_vdup_lane_any (s8, , q, __a, __b)
552 #define __aarch64_vdup_laneq_s16(__a, __b) \
553 __aarch64_vdup_lane_any (s16, , q, __a, __b)
554 #define __aarch64_vdup_laneq_s32(__a, __b) \
555 __aarch64_vdup_lane_any (s32, , q, __a, __b)
556 #define __aarch64_vdup_laneq_s64(__a, __b) \
557 __aarch64_vdup_lane_any (s64, , q, __a, __b)
558 #define __aarch64_vdup_laneq_u8(__a, __b) \
559 __aarch64_vdup_lane_any (u8, , q, __a, __b)
560 #define __aarch64_vdup_laneq_u16(__a, __b) \
561 __aarch64_vdup_lane_any (u16, , q, __a, __b)
562 #define __aarch64_vdup_laneq_u32(__a, __b) \
563 __aarch64_vdup_lane_any (u32, , q, __a, __b)
564 #define __aarch64_vdup_laneq_u64(__a, __b) \
565 __aarch64_vdup_lane_any (u64, , q, __a, __b)
566
567 /* __aarch64_vdupq_lane internal macros. */
568 #define __aarch64_vdupq_lane_f32(__a, __b) \
569 __aarch64_vdup_lane_any (f32, q, , __a, __b)
570 #define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
571 #define __aarch64_vdupq_lane_p8(__a, __b) \
572 __aarch64_vdup_lane_any (p8, q, , __a, __b)
573 #define __aarch64_vdupq_lane_p16(__a, __b) \
574 __aarch64_vdup_lane_any (p16, q, , __a, __b)
575 #define __aarch64_vdupq_lane_s8(__a, __b) \
576 __aarch64_vdup_lane_any (s8, q, , __a, __b)
577 #define __aarch64_vdupq_lane_s16(__a, __b) \
578 __aarch64_vdup_lane_any (s16, q, , __a, __b)
579 #define __aarch64_vdupq_lane_s32(__a, __b) \
580 __aarch64_vdup_lane_any (s32, q, , __a, __b)
581 #define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
582 #define __aarch64_vdupq_lane_u8(__a, __b) \
583 __aarch64_vdup_lane_any (u8, q, , __a, __b)
584 #define __aarch64_vdupq_lane_u16(__a, __b) \
585 __aarch64_vdup_lane_any (u16, q, , __a, __b)
586 #define __aarch64_vdupq_lane_u32(__a, __b) \
587 __aarch64_vdup_lane_any (u32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
589
590 /* __aarch64_vdupq_laneq internal macros. */
591 #define __aarch64_vdupq_laneq_f32(__a, __b) \
592 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
593 #define __aarch64_vdupq_laneq_f64(__a, __b) \
594 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
595 #define __aarch64_vdupq_laneq_p8(__a, __b) \
596 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
597 #define __aarch64_vdupq_laneq_p16(__a, __b) \
598 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
599 #define __aarch64_vdupq_laneq_s8(__a, __b) \
600 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
601 #define __aarch64_vdupq_laneq_s16(__a, __b) \
602 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
603 #define __aarch64_vdupq_laneq_s32(__a, __b) \
604 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
605 #define __aarch64_vdupq_laneq_s64(__a, __b) \
606 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
607 #define __aarch64_vdupq_laneq_u8(__a, __b) \
608 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
609 #define __aarch64_vdupq_laneq_u16(__a, __b) \
610 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
611 #define __aarch64_vdupq_laneq_u32(__a, __b) \
612 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
613 #define __aarch64_vdupq_laneq_u64(__a, __b) \
614 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
615
616 /* vadd */
617 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
618 vadd_s8 (int8x8_t __a, int8x8_t __b)
619 {
620 return __a + __b;
621 }
622
623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
624 vadd_s16 (int16x4_t __a, int16x4_t __b)
625 {
626 return __a + __b;
627 }
628
629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
630 vadd_s32 (int32x2_t __a, int32x2_t __b)
631 {
632 return __a + __b;
633 }
634
635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
636 vadd_f32 (float32x2_t __a, float32x2_t __b)
637 {
638 return __a + __b;
639 }
640
641 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
642 vadd_f64 (float64x1_t __a, float64x1_t __b)
643 {
644 return __a + __b;
645 }
646
647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
648 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
649 {
650 return __a + __b;
651 }
652
653 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
654 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
655 {
656 return __a + __b;
657 }
658
659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
660 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
661 {
662 return __a + __b;
663 }
664
665 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
666 vadd_s64 (int64x1_t __a, int64x1_t __b)
667 {
668 return __a + __b;
669 }
670
671 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
672 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
673 {
674 return __a + __b;
675 }
676
677 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
678 vaddq_s8 (int8x16_t __a, int8x16_t __b)
679 {
680 return __a + __b;
681 }
682
683 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
684 vaddq_s16 (int16x8_t __a, int16x8_t __b)
685 {
686 return __a + __b;
687 }
688
689 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
690 vaddq_s32 (int32x4_t __a, int32x4_t __b)
691 {
692 return __a + __b;
693 }
694
695 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
696 vaddq_s64 (int64x2_t __a, int64x2_t __b)
697 {
698 return __a + __b;
699 }
700
701 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
702 vaddq_f32 (float32x4_t __a, float32x4_t __b)
703 {
704 return __a + __b;
705 }
706
707 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
708 vaddq_f64 (float64x2_t __a, float64x2_t __b)
709 {
710 return __a + __b;
711 }
712
713 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
714 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
715 {
716 return __a + __b;
717 }
718
719 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
720 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
721 {
722 return __a + __b;
723 }
724
725 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
726 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
727 {
728 return __a + __b;
729 }
730
731 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
732 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
733 {
734 return __a + __b;
735 }
736
737 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
738 vaddl_s8 (int8x8_t __a, int8x8_t __b)
739 {
740 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
741 }
742
743 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
744 vaddl_s16 (int16x4_t __a, int16x4_t __b)
745 {
746 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
747 }
748
749 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
750 vaddl_s32 (int32x2_t __a, int32x2_t __b)
751 {
752 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
753 }
754
755 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
756 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
757 {
758 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
759 (int8x8_t) __b);
760 }
761
762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
763 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
764 {
765 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
766 (int16x4_t) __b);
767 }
768
769 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
770 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
771 {
772 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
773 (int32x2_t) __b);
774 }
775
776 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
777 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
778 {
779 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
780 }
781
782 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
783 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
784 {
785 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
786 }
787
788 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
789 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
790 {
791 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
792 }
793
794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
795 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
796 {
797 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
798 (int8x16_t) __b);
799 }
800
801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
802 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
803 {
804 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
805 (int16x8_t) __b);
806 }
807
808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
809 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
810 {
811 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
812 (int32x4_t) __b);
813 }
814
815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
816 vaddw_s8 (int16x8_t __a, int8x8_t __b)
817 {
818 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
819 }
820
821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
822 vaddw_s16 (int32x4_t __a, int16x4_t __b)
823 {
824 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
825 }
826
827 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
828 vaddw_s32 (int64x2_t __a, int32x2_t __b)
829 {
830 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
831 }
832
833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
834 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
835 {
836 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
837 (int8x8_t) __b);
838 }
839
840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
841 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
842 {
843 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
844 (int16x4_t) __b);
845 }
846
847 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
848 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
849 {
850 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
851 (int32x2_t) __b);
852 }
853
854 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
855 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
856 {
857 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
858 }
859
860 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
861 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
862 {
863 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
864 }
865
866 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
867 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
868 {
869 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
870 }
871
872 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
873 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
874 {
875 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
876 (int8x16_t) __b);
877 }
878
879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
880 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
881 {
882 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
883 (int16x8_t) __b);
884 }
885
886 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
887 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
888 {
889 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
890 (int32x4_t) __b);
891 }
892
893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
894 vhadd_s8 (int8x8_t __a, int8x8_t __b)
895 {
896 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
897 }
898
899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
900 vhadd_s16 (int16x4_t __a, int16x4_t __b)
901 {
902 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
903 }
904
905 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
906 vhadd_s32 (int32x2_t __a, int32x2_t __b)
907 {
908 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
909 }
910
911 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
912 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
913 {
914 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
915 (int8x8_t) __b);
916 }
917
918 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
919 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
920 {
921 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
922 (int16x4_t) __b);
923 }
924
925 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
926 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
927 {
928 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
929 (int32x2_t) __b);
930 }
931
932 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
933 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
934 {
935 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
936 }
937
938 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
939 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
940 {
941 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
942 }
943
944 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
945 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
946 {
947 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
948 }
949
950 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
951 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
952 {
953 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
954 (int8x16_t) __b);
955 }
956
957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
958 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
959 {
960 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
961 (int16x8_t) __b);
962 }
963
964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
965 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
966 {
967 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
968 (int32x4_t) __b);
969 }
970
971 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
972 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
973 {
974 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
975 }
976
977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
978 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
979 {
980 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
981 }
982
983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
984 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
985 {
986 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
987 }
988
989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
990 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
991 {
992 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
993 (int8x8_t) __b);
994 }
995
996 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
997 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
998 {
999 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1000 (int16x4_t) __b);
1001 }
1002
1003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1004 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1005 {
1006 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1007 (int32x2_t) __b);
1008 }
1009
1010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1011 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1012 {
1013 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1014 }
1015
1016 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1017 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1018 {
1019 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1020 }
1021
1022 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1023 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1024 {
1025 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1026 }
1027
1028 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1029 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1030 {
1031 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1032 (int8x16_t) __b);
1033 }
1034
1035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1036 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1037 {
1038 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1039 (int16x8_t) __b);
1040 }
1041
1042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1043 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1044 {
1045 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1046 (int32x4_t) __b);
1047 }
1048
1049 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1050 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1051 {
1052 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1053 }
1054
1055 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1056 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1057 {
1058 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1059 }
1060
1061 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1062 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1063 {
1064 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1065 }
1066
1067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1068 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1069 {
1070 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1071 (int16x8_t) __b);
1072 }
1073
1074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1075 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1076 {
1077 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1078 (int32x4_t) __b);
1079 }
1080
1081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1082 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1083 {
1084 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1085 (int64x2_t) __b);
1086 }
1087
1088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1089 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1090 {
1091 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1092 }
1093
1094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1095 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1096 {
1097 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1098 }
1099
1100 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1101 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1102 {
1103 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1104 }
1105
1106 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1107 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1108 {
1109 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1110 (int16x8_t) __b);
1111 }
1112
1113 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1114 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1115 {
1116 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1117 (int32x4_t) __b);
1118 }
1119
1120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1121 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1122 {
1123 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1124 (int64x2_t) __b);
1125 }
1126
1127 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1128 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1129 {
1130 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1131 }
1132
1133 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1134 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1135 {
1136 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1137 }
1138
1139 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1140 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1141 {
1142 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1143 }
1144
1145 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1146 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1147 {
1148 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1149 (int16x8_t) __b,
1150 (int16x8_t) __c);
1151 }
1152
1153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1154 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1155 {
1156 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1157 (int32x4_t) __b,
1158 (int32x4_t) __c);
1159 }
1160
1161 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1162 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1163 {
1164 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1165 (int64x2_t) __b,
1166 (int64x2_t) __c);
1167 }
1168
1169 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1170 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1171 {
1172 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1173 }
1174
1175 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1176 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1177 {
1178 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1179 }
1180
1181 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1182 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1183 {
1184 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1185 }
1186
1187 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1188 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1189 {
1190 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1191 (int16x8_t) __b,
1192 (int16x8_t) __c);
1193 }
1194
1195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1196 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1197 {
1198 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1199 (int32x4_t) __b,
1200 (int32x4_t) __c);
1201 }
1202
1203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1204 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1205 {
1206 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1207 (int64x2_t) __b,
1208 (int64x2_t) __c);
1209 }
1210
1211 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1212 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1213 {
1214 return __a / __b;
1215 }
1216
1217 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1218 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1219 {
1220 return __a / __b;
1221 }
1222
1223 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1224 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1225 {
1226 return __a / __b;
1227 }
1228
1229 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1230 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1231 {
1232 return __a / __b;
1233 }
1234
1235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1236 vmul_s8 (int8x8_t __a, int8x8_t __b)
1237 {
1238 return __a * __b;
1239 }
1240
1241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1242 vmul_s16 (int16x4_t __a, int16x4_t __b)
1243 {
1244 return __a * __b;
1245 }
1246
1247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1248 vmul_s32 (int32x2_t __a, int32x2_t __b)
1249 {
1250 return __a * __b;
1251 }
1252
1253 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1254 vmul_f32 (float32x2_t __a, float32x2_t __b)
1255 {
1256 return __a * __b;
1257 }
1258
1259 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1260 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1261 {
1262 return __a * __b;
1263 }
1264
1265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1266 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1267 {
1268 return __a * __b;
1269 }
1270
1271 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1272 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1273 {
1274 return __a * __b;
1275 }
1276
1277 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1278 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1279 {
1280 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1281 (int8x8_t) __b);
1282 }
1283
1284 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1285 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1286 {
1287 return __a * __b;
1288 }
1289
1290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1291 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1292 {
1293 return __a * __b;
1294 }
1295
1296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1297 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1298 {
1299 return __a * __b;
1300 }
1301
1302 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1303 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1304 {
1305 return __a * __b;
1306 }
1307
1308 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1309 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1310 {
1311 return __a * __b;
1312 }
1313
1314 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1315 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1316 {
1317 return __a * __b;
1318 }
1319
1320 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1321 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1322 {
1323 return __a * __b;
1324 }
1325
1326 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1327 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1328 {
1329 return __a * __b;
1330 }
1331
1332 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1333 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1334 {
1335 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1336 (int8x16_t) __b);
1337 }
1338
1339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1340 vand_s8 (int8x8_t __a, int8x8_t __b)
1341 {
1342 return __a & __b;
1343 }
1344
1345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1346 vand_s16 (int16x4_t __a, int16x4_t __b)
1347 {
1348 return __a & __b;
1349 }
1350
1351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1352 vand_s32 (int32x2_t __a, int32x2_t __b)
1353 {
1354 return __a & __b;
1355 }
1356
1357 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1358 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1359 {
1360 return __a & __b;
1361 }
1362
1363 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1364 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1365 {
1366 return __a & __b;
1367 }
1368
1369 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1370 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1371 {
1372 return __a & __b;
1373 }
1374
1375 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1376 vand_s64 (int64x1_t __a, int64x1_t __b)
1377 {
1378 return __a & __b;
1379 }
1380
1381 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1382 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1383 {
1384 return __a & __b;
1385 }
1386
1387 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1388 vandq_s8 (int8x16_t __a, int8x16_t __b)
1389 {
1390 return __a & __b;
1391 }
1392
1393 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1394 vandq_s16 (int16x8_t __a, int16x8_t __b)
1395 {
1396 return __a & __b;
1397 }
1398
1399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1400 vandq_s32 (int32x4_t __a, int32x4_t __b)
1401 {
1402 return __a & __b;
1403 }
1404
1405 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1406 vandq_s64 (int64x2_t __a, int64x2_t __b)
1407 {
1408 return __a & __b;
1409 }
1410
1411 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1412 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1413 {
1414 return __a & __b;
1415 }
1416
1417 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1418 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1419 {
1420 return __a & __b;
1421 }
1422
1423 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1424 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1425 {
1426 return __a & __b;
1427 }
1428
1429 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1430 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1431 {
1432 return __a & __b;
1433 }
1434
1435 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1436 vorr_s8 (int8x8_t __a, int8x8_t __b)
1437 {
1438 return __a | __b;
1439 }
1440
1441 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1442 vorr_s16 (int16x4_t __a, int16x4_t __b)
1443 {
1444 return __a | __b;
1445 }
1446
1447 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1448 vorr_s32 (int32x2_t __a, int32x2_t __b)
1449 {
1450 return __a | __b;
1451 }
1452
1453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1454 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1455 {
1456 return __a | __b;
1457 }
1458
1459 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1460 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1461 {
1462 return __a | __b;
1463 }
1464
1465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1466 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1467 {
1468 return __a | __b;
1469 }
1470
1471 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1472 vorr_s64 (int64x1_t __a, int64x1_t __b)
1473 {
1474 return __a | __b;
1475 }
1476
1477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1478 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1479 {
1480 return __a | __b;
1481 }
1482
1483 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1484 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1485 {
1486 return __a | __b;
1487 }
1488
1489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1490 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1491 {
1492 return __a | __b;
1493 }
1494
1495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1496 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1497 {
1498 return __a | __b;
1499 }
1500
1501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1502 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1503 {
1504 return __a | __b;
1505 }
1506
1507 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1508 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1509 {
1510 return __a | __b;
1511 }
1512
1513 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1514 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1515 {
1516 return __a | __b;
1517 }
1518
1519 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1520 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1521 {
1522 return __a | __b;
1523 }
1524
1525 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1526 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1527 {
1528 return __a | __b;
1529 }
1530
1531 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1532 veor_s8 (int8x8_t __a, int8x8_t __b)
1533 {
1534 return __a ^ __b;
1535 }
1536
1537 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1538 veor_s16 (int16x4_t __a, int16x4_t __b)
1539 {
1540 return __a ^ __b;
1541 }
1542
1543 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1544 veor_s32 (int32x2_t __a, int32x2_t __b)
1545 {
1546 return __a ^ __b;
1547 }
1548
1549 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1550 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1551 {
1552 return __a ^ __b;
1553 }
1554
1555 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1556 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1557 {
1558 return __a ^ __b;
1559 }
1560
1561 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1562 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1563 {
1564 return __a ^ __b;
1565 }
1566
1567 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1568 veor_s64 (int64x1_t __a, int64x1_t __b)
1569 {
1570 return __a ^ __b;
1571 }
1572
1573 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1574 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1575 {
1576 return __a ^ __b;
1577 }
1578
1579 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1580 veorq_s8 (int8x16_t __a, int8x16_t __b)
1581 {
1582 return __a ^ __b;
1583 }
1584
1585 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1586 veorq_s16 (int16x8_t __a, int16x8_t __b)
1587 {
1588 return __a ^ __b;
1589 }
1590
1591 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1592 veorq_s32 (int32x4_t __a, int32x4_t __b)
1593 {
1594 return __a ^ __b;
1595 }
1596
1597 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1598 veorq_s64 (int64x2_t __a, int64x2_t __b)
1599 {
1600 return __a ^ __b;
1601 }
1602
1603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1604 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1605 {
1606 return __a ^ __b;
1607 }
1608
1609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1610 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1611 {
1612 return __a ^ __b;
1613 }
1614
1615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1616 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1617 {
1618 return __a ^ __b;
1619 }
1620
1621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1622 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1623 {
1624 return __a ^ __b;
1625 }
1626
1627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1628 vbic_s8 (int8x8_t __a, int8x8_t __b)
1629 {
1630 return __a & ~__b;
1631 }
1632
1633 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1634 vbic_s16 (int16x4_t __a, int16x4_t __b)
1635 {
1636 return __a & ~__b;
1637 }
1638
1639 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1640 vbic_s32 (int32x2_t __a, int32x2_t __b)
1641 {
1642 return __a & ~__b;
1643 }
1644
1645 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1646 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1647 {
1648 return __a & ~__b;
1649 }
1650
1651 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1652 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1653 {
1654 return __a & ~__b;
1655 }
1656
1657 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1658 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1659 {
1660 return __a & ~__b;
1661 }
1662
1663 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1664 vbic_s64 (int64x1_t __a, int64x1_t __b)
1665 {
1666 return __a & ~__b;
1667 }
1668
1669 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1670 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1671 {
1672 return __a & ~__b;
1673 }
1674
1675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1676 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1677 {
1678 return __a & ~__b;
1679 }
1680
1681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1682 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1683 {
1684 return __a & ~__b;
1685 }
1686
1687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1688 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1689 {
1690 return __a & ~__b;
1691 }
1692
1693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1694 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1695 {
1696 return __a & ~__b;
1697 }
1698
1699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1700 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1701 {
1702 return __a & ~__b;
1703 }
1704
1705 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1706 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1707 {
1708 return __a & ~__b;
1709 }
1710
1711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1712 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1713 {
1714 return __a & ~__b;
1715 }
1716
1717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1718 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1719 {
1720 return __a & ~__b;
1721 }
1722
1723 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1724 vorn_s8 (int8x8_t __a, int8x8_t __b)
1725 {
1726 return __a | ~__b;
1727 }
1728
1729 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1730 vorn_s16 (int16x4_t __a, int16x4_t __b)
1731 {
1732 return __a | ~__b;
1733 }
1734
1735 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1736 vorn_s32 (int32x2_t __a, int32x2_t __b)
1737 {
1738 return __a | ~__b;
1739 }
1740
1741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1742 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1743 {
1744 return __a | ~__b;
1745 }
1746
1747 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1748 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1749 {
1750 return __a | ~__b;
1751 }
1752
1753 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1754 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1755 {
1756 return __a | ~__b;
1757 }
1758
1759 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1760 vorn_s64 (int64x1_t __a, int64x1_t __b)
1761 {
1762 return __a | ~__b;
1763 }
1764
1765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1766 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1767 {
1768 return __a | ~__b;
1769 }
1770
1771 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1772 vornq_s8 (int8x16_t __a, int8x16_t __b)
1773 {
1774 return __a | ~__b;
1775 }
1776
1777 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1778 vornq_s16 (int16x8_t __a, int16x8_t __b)
1779 {
1780 return __a | ~__b;
1781 }
1782
1783 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1784 vornq_s32 (int32x4_t __a, int32x4_t __b)
1785 {
1786 return __a | ~__b;
1787 }
1788
1789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1790 vornq_s64 (int64x2_t __a, int64x2_t __b)
1791 {
1792 return __a | ~__b;
1793 }
1794
1795 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1796 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1797 {
1798 return __a | ~__b;
1799 }
1800
1801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1802 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1803 {
1804 return __a | ~__b;
1805 }
1806
1807 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1808 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1809 {
1810 return __a | ~__b;
1811 }
1812
1813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1814 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1815 {
1816 return __a | ~__b;
1817 }
1818
1819 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1820 vsub_s8 (int8x8_t __a, int8x8_t __b)
1821 {
1822 return __a - __b;
1823 }
1824
1825 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1826 vsub_s16 (int16x4_t __a, int16x4_t __b)
1827 {
1828 return __a - __b;
1829 }
1830
1831 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1832 vsub_s32 (int32x2_t __a, int32x2_t __b)
1833 {
1834 return __a - __b;
1835 }
1836
1837 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1838 vsub_f32 (float32x2_t __a, float32x2_t __b)
1839 {
1840 return __a - __b;
1841 }
1842
1843 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1844 vsub_f64 (float64x1_t __a, float64x1_t __b)
1845 {
1846 return __a - __b;
1847 }
1848
1849 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1850 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1851 {
1852 return __a - __b;
1853 }
1854
1855 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1856 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1857 {
1858 return __a - __b;
1859 }
1860
1861 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1862 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1863 {
1864 return __a - __b;
1865 }
1866
1867 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1868 vsub_s64 (int64x1_t __a, int64x1_t __b)
1869 {
1870 return __a - __b;
1871 }
1872
1873 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1874 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1875 {
1876 return __a - __b;
1877 }
1878
1879 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1880 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1881 {
1882 return __a - __b;
1883 }
1884
1885 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1886 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1887 {
1888 return __a - __b;
1889 }
1890
1891 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1892 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1893 {
1894 return __a - __b;
1895 }
1896
1897 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1898 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1899 {
1900 return __a - __b;
1901 }
1902
1903 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1904 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1905 {
1906 return __a - __b;
1907 }
1908
1909 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1910 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1911 {
1912 return __a - __b;
1913 }
1914
1915 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1916 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1917 {
1918 return __a - __b;
1919 }
1920
1921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1922 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1923 {
1924 return __a - __b;
1925 }
1926
1927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1928 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1929 {
1930 return __a - __b;
1931 }
1932
1933 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1934 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1935 {
1936 return __a - __b;
1937 }
1938
1939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1940 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1941 {
1942 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1943 }
1944
1945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1946 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1947 {
1948 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1949 }
1950
1951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1952 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1953 {
1954 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1955 }
1956
1957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1958 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1959 {
1960 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1961 (int8x8_t) __b);
1962 }
1963
1964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1965 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1966 {
1967 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1968 (int16x4_t) __b);
1969 }
1970
1971 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1972 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1973 {
1974 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1975 (int32x2_t) __b);
1976 }
1977
1978 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1979 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1980 {
1981 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1982 }
1983
1984 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1985 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1986 {
1987 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1988 }
1989
1990 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1991 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1992 {
1993 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1994 }
1995
1996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1997 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1998 {
1999 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2000 (int8x16_t) __b);
2001 }
2002
2003 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2004 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2005 {
2006 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2007 (int16x8_t) __b);
2008 }
2009
2010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2011 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2012 {
2013 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2014 (int32x4_t) __b);
2015 }
2016
2017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2018 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2019 {
2020 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2021 }
2022
2023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2024 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2025 {
2026 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2027 }
2028
2029 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2030 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2031 {
2032 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2033 }
2034
2035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2036 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2037 {
2038 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2039 (int8x8_t) __b);
2040 }
2041
2042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2043 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2044 {
2045 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2046 (int16x4_t) __b);
2047 }
2048
2049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2050 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2051 {
2052 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2053 (int32x2_t) __b);
2054 }
2055
2056 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2057 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2058 {
2059 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2060 }
2061
2062 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2063 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2064 {
2065 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2066 }
2067
2068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2069 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2070 {
2071 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2072 }
2073
2074 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2075 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2076 {
2077 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2078 (int8x16_t) __b);
2079 }
2080
2081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2082 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2083 {
2084 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2085 (int16x8_t) __b);
2086 }
2087
2088 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2089 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2090 {
2091 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2092 (int32x4_t) __b);
2093 }
2094
2095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2096 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2097 {
2098 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2099 }
2100
2101 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2102 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2103 {
2104 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2105 }
2106
2107 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2108 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2109 {
2110 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2111 }
2112
2113 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2114 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2115 {
2116 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2117 }
2118
2119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2120 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2121 {
2122 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2123 (int8x8_t) __b);
2124 }
2125
2126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2127 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2128 {
2129 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2130 (int16x4_t) __b);
2131 }
2132
2133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2134 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2135 {
2136 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2137 (int32x2_t) __b);
2138 }
2139
2140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2141 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2142 {
2143 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2144 (int64x1_t) __b);
2145 }
2146
2147 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2148 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2149 {
2150 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2151 }
2152
2153 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2154 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2155 {
2156 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2157 }
2158
2159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2160 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2161 {
2162 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2163 }
2164
2165 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2166 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2167 {
2168 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2169 }
2170
2171 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2172 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2173 {
2174 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2175 (int8x16_t) __b);
2176 }
2177
2178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2179 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2180 {
2181 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2182 (int16x8_t) __b);
2183 }
2184
2185 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2186 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2187 {
2188 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2189 (int32x4_t) __b);
2190 }
2191
2192 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2193 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2194 {
2195 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2196 (int64x2_t) __b);
2197 }
2198
2199 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2200 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2201 {
2202 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2203 }
2204
2205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2206 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2207 {
2208 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2209 }
2210
2211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2212 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2213 {
2214 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2215 }
2216
2217 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2218 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2219 {
2220 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2221 }
2222
2223 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2224 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2225 {
2226 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2227 (int8x8_t) __b);
2228 }
2229
2230 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2231 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2232 {
2233 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2234 (int16x4_t) __b);
2235 }
2236
2237 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2238 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2239 {
2240 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2241 (int32x2_t) __b);
2242 }
2243
2244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2245 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2246 {
2247 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2248 (int64x1_t) __b);
2249 }
2250
2251 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2252 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2253 {
2254 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2255 }
2256
2257 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2258 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2259 {
2260 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2261 }
2262
2263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2264 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2265 {
2266 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2267 }
2268
2269 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2270 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2271 {
2272 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2273 }
2274
2275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2276 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2277 {
2278 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2279 (int8x16_t) __b);
2280 }
2281
2282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2283 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2284 {
2285 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2286 (int16x8_t) __b);
2287 }
2288
2289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2290 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2291 {
2292 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2293 (int32x4_t) __b);
2294 }
2295
2296 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2297 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2298 {
2299 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2300 (int64x2_t) __b);
2301 }
2302
2303 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2304 vqneg_s8 (int8x8_t __a)
2305 {
2306 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2307 }
2308
2309 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2310 vqneg_s16 (int16x4_t __a)
2311 {
2312 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2313 }
2314
2315 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2316 vqneg_s32 (int32x2_t __a)
2317 {
2318 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2319 }
2320
2321 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2322 vqnegq_s8 (int8x16_t __a)
2323 {
2324 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2325 }
2326
2327 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2328 vqnegq_s16 (int16x8_t __a)
2329 {
2330 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2331 }
2332
2333 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2334 vqnegq_s32 (int32x4_t __a)
2335 {
2336 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2337 }
2338
2339 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2340 vqabs_s8 (int8x8_t __a)
2341 {
2342 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2343 }
2344
2345 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2346 vqabs_s16 (int16x4_t __a)
2347 {
2348 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2349 }
2350
2351 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2352 vqabs_s32 (int32x2_t __a)
2353 {
2354 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2355 }
2356
2357 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2358 vqabsq_s8 (int8x16_t __a)
2359 {
2360 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2361 }
2362
2363 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2364 vqabsq_s16 (int16x8_t __a)
2365 {
2366 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2367 }
2368
2369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2370 vqabsq_s32 (int32x4_t __a)
2371 {
2372 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2373 }
2374
2375 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2376 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2377 {
2378 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2379 }
2380
2381 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2382 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2383 {
2384 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2385 }
2386
2387 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2388 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2389 {
2390 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2391 }
2392
2393 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2394 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2395 {
2396 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2397 }
2398
2399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2400 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2401 {
2402 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2403 }
2404
2405 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2406 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2407 {
2408 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2409 }
2410
2411 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2412 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2413 {
2414 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2415 }
2416
2417 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2418 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2419 {
2420 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2421 }
2422
2423 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2424 vcreate_s8 (uint64_t __a)
2425 {
2426 return (int8x8_t) __a;
2427 }
2428
2429 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2430 vcreate_s16 (uint64_t __a)
2431 {
2432 return (int16x4_t) __a;
2433 }
2434
2435 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2436 vcreate_s32 (uint64_t __a)
2437 {
2438 return (int32x2_t) __a;
2439 }
2440
2441 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2442 vcreate_s64 (uint64_t __a)
2443 {
2444 return (int64x1_t) __a;
2445 }
2446
2447 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2448 vcreate_f32 (uint64_t __a)
2449 {
2450 return (float32x2_t) __a;
2451 }
2452
2453 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2454 vcreate_u8 (uint64_t __a)
2455 {
2456 return (uint8x8_t) __a;
2457 }
2458
2459 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2460 vcreate_u16 (uint64_t __a)
2461 {
2462 return (uint16x4_t) __a;
2463 }
2464
2465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2466 vcreate_u32 (uint64_t __a)
2467 {
2468 return (uint32x2_t) __a;
2469 }
2470
2471 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2472 vcreate_u64 (uint64_t __a)
2473 {
2474 return (uint64x1_t) __a;
2475 }
2476
2477 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2478 vcreate_f64 (uint64_t __a)
2479 {
2480 return (float64x1_t) __builtin_aarch64_createdf (__a);
2481 }
2482
2483 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2484 vcreate_p8 (uint64_t __a)
2485 {
2486 return (poly8x8_t) __a;
2487 }
2488
2489 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2490 vcreate_p16 (uint64_t __a)
2491 {
2492 return (poly16x4_t) __a;
2493 }
2494
2495 /* vget_lane */
2496
2497 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2498 vget_lane_f32 (float32x2_t __a, const int __b)
2499 {
2500 return __aarch64_vget_lane_f32 (__a, __b);
2501 }
2502
2503 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2504 vget_lane_f64 (float64x1_t __a, const int __b)
2505 {
2506 return __aarch64_vget_lane_f64 (__a, __b);
2507 }
2508
2509 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2510 vget_lane_p8 (poly8x8_t __a, const int __b)
2511 {
2512 return __aarch64_vget_lane_p8 (__a, __b);
2513 }
2514
2515 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2516 vget_lane_p16 (poly16x4_t __a, const int __b)
2517 {
2518 return __aarch64_vget_lane_p16 (__a, __b);
2519 }
2520
2521 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2522 vget_lane_s8 (int8x8_t __a, const int __b)
2523 {
2524 return __aarch64_vget_lane_s8 (__a, __b);
2525 }
2526
2527 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2528 vget_lane_s16 (int16x4_t __a, const int __b)
2529 {
2530 return __aarch64_vget_lane_s16 (__a, __b);
2531 }
2532
2533 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2534 vget_lane_s32 (int32x2_t __a, const int __b)
2535 {
2536 return __aarch64_vget_lane_s32 (__a, __b);
2537 }
2538
2539 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2540 vget_lane_s64 (int64x1_t __a, const int __b)
2541 {
2542 return __aarch64_vget_lane_s64 (__a, __b);
2543 }
2544
2545 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2546 vget_lane_u8 (uint8x8_t __a, const int __b)
2547 {
2548 return __aarch64_vget_lane_u8 (__a, __b);
2549 }
2550
2551 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2552 vget_lane_u16 (uint16x4_t __a, const int __b)
2553 {
2554 return __aarch64_vget_lane_u16 (__a, __b);
2555 }
2556
2557 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2558 vget_lane_u32 (uint32x2_t __a, const int __b)
2559 {
2560 return __aarch64_vget_lane_u32 (__a, __b);
2561 }
2562
2563 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2564 vget_lane_u64 (uint64x1_t __a, const int __b)
2565 {
2566 return __aarch64_vget_lane_u64 (__a, __b);
2567 }
2568
2569 /* vgetq_lane */
2570
2571 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2572 vgetq_lane_f32 (float32x4_t __a, const int __b)
2573 {
2574 return __aarch64_vgetq_lane_f32 (__a, __b);
2575 }
2576
2577 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2578 vgetq_lane_f64 (float64x2_t __a, const int __b)
2579 {
2580 return __aarch64_vgetq_lane_f64 (__a, __b);
2581 }
2582
2583 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2584 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2585 {
2586 return __aarch64_vgetq_lane_p8 (__a, __b);
2587 }
2588
2589 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2590 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2591 {
2592 return __aarch64_vgetq_lane_p16 (__a, __b);
2593 }
2594
2595 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2596 vgetq_lane_s8 (int8x16_t __a, const int __b)
2597 {
2598 return __aarch64_vgetq_lane_s8 (__a, __b);
2599 }
2600
2601 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2602 vgetq_lane_s16 (int16x8_t __a, const int __b)
2603 {
2604 return __aarch64_vgetq_lane_s16 (__a, __b);
2605 }
2606
2607 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2608 vgetq_lane_s32 (int32x4_t __a, const int __b)
2609 {
2610 return __aarch64_vgetq_lane_s32 (__a, __b);
2611 }
2612
2613 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2614 vgetq_lane_s64 (int64x2_t __a, const int __b)
2615 {
2616 return __aarch64_vgetq_lane_s64 (__a, __b);
2617 }
2618
2619 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2620 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2621 {
2622 return __aarch64_vgetq_lane_u8 (__a, __b);
2623 }
2624
2625 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2626 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2627 {
2628 return __aarch64_vgetq_lane_u16 (__a, __b);
2629 }
2630
2631 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2632 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2633 {
2634 return __aarch64_vgetq_lane_u32 (__a, __b);
2635 }
2636
2637 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2638 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2639 {
2640 return __aarch64_vgetq_lane_u64 (__a, __b);
2641 }
2642
2643 /* vreinterpret */
2644
2645 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2646 vreinterpret_p8_s8 (int8x8_t __a)
2647 {
2648 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2649 }
2650
2651 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2652 vreinterpret_p8_s16 (int16x4_t __a)
2653 {
2654 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2655 }
2656
2657 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2658 vreinterpret_p8_s32 (int32x2_t __a)
2659 {
2660 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2661 }
2662
2663 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2664 vreinterpret_p8_s64 (int64x1_t __a)
2665 {
2666 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2667 }
2668
2669 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2670 vreinterpret_p8_f32 (float32x2_t __a)
2671 {
2672 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2673 }
2674
2675 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2676 vreinterpret_p8_u8 (uint8x8_t __a)
2677 {
2678 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2679 }
2680
2681 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2682 vreinterpret_p8_u16 (uint16x4_t __a)
2683 {
2684 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2685 }
2686
2687 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2688 vreinterpret_p8_u32 (uint32x2_t __a)
2689 {
2690 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2691 }
2692
2693 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2694 vreinterpret_p8_u64 (uint64x1_t __a)
2695 {
2696 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2697 }
2698
2699 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2700 vreinterpret_p8_p16 (poly16x4_t __a)
2701 {
2702 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2703 }
2704
2705 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2706 vreinterpretq_p8_s8 (int8x16_t __a)
2707 {
2708 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2709 }
2710
2711 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2712 vreinterpretq_p8_s16 (int16x8_t __a)
2713 {
2714 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2715 }
2716
2717 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2718 vreinterpretq_p8_s32 (int32x4_t __a)
2719 {
2720 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2721 }
2722
2723 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2724 vreinterpretq_p8_s64 (int64x2_t __a)
2725 {
2726 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2727 }
2728
2729 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2730 vreinterpretq_p8_f32 (float32x4_t __a)
2731 {
2732 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2733 }
2734
2735 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2736 vreinterpretq_p8_u8 (uint8x16_t __a)
2737 {
2738 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2739 __a);
2740 }
2741
2742 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2743 vreinterpretq_p8_u16 (uint16x8_t __a)
2744 {
2745 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2746 __a);
2747 }
2748
2749 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2750 vreinterpretq_p8_u32 (uint32x4_t __a)
2751 {
2752 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2753 __a);
2754 }
2755
2756 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2757 vreinterpretq_p8_u64 (uint64x2_t __a)
2758 {
2759 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2760 __a);
2761 }
2762
2763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2764 vreinterpretq_p8_p16 (poly16x8_t __a)
2765 {
2766 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2767 __a);
2768 }
2769
2770 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2771 vreinterpret_p16_s8 (int8x8_t __a)
2772 {
2773 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2774 }
2775
2776 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2777 vreinterpret_p16_s16 (int16x4_t __a)
2778 {
2779 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2780 }
2781
2782 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2783 vreinterpret_p16_s32 (int32x2_t __a)
2784 {
2785 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2786 }
2787
2788 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2789 vreinterpret_p16_s64 (int64x1_t __a)
2790 {
2791 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2792 }
2793
2794 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2795 vreinterpret_p16_f32 (float32x2_t __a)
2796 {
2797 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2798 }
2799
2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801 vreinterpret_p16_u8 (uint8x8_t __a)
2802 {
2803 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2804 }
2805
2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807 vreinterpret_p16_u16 (uint16x4_t __a)
2808 {
2809 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2810 }
2811
2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813 vreinterpret_p16_u32 (uint32x2_t __a)
2814 {
2815 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2816 }
2817
2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819 vreinterpret_p16_u64 (uint64x1_t __a)
2820 {
2821 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2822 }
2823
2824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825 vreinterpret_p16_p8 (poly8x8_t __a)
2826 {
2827 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2828 }
2829
2830 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2831 vreinterpretq_p16_s8 (int8x16_t __a)
2832 {
2833 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2834 }
2835
2836 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2837 vreinterpretq_p16_s16 (int16x8_t __a)
2838 {
2839 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2840 }
2841
2842 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2843 vreinterpretq_p16_s32 (int32x4_t __a)
2844 {
2845 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2846 }
2847
2848 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2849 vreinterpretq_p16_s64 (int64x2_t __a)
2850 {
2851 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2852 }
2853
2854 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2855 vreinterpretq_p16_f32 (float32x4_t __a)
2856 {
2857 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2858 }
2859
2860 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2861 vreinterpretq_p16_u8 (uint8x16_t __a)
2862 {
2863 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2864 __a);
2865 }
2866
2867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2868 vreinterpretq_p16_u16 (uint16x8_t __a)
2869 {
2870 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2871 }
2872
2873 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2874 vreinterpretq_p16_u32 (uint32x4_t __a)
2875 {
2876 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2877 }
2878
2879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2880 vreinterpretq_p16_u64 (uint64x2_t __a)
2881 {
2882 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2883 }
2884
2885 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2886 vreinterpretq_p16_p8 (poly8x16_t __a)
2887 {
2888 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2889 __a);
2890 }
2891
2892 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2893 vreinterpret_f32_s8 (int8x8_t __a)
2894 {
2895 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2896 }
2897
2898 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2899 vreinterpret_f32_s16 (int16x4_t __a)
2900 {
2901 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2902 }
2903
2904 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2905 vreinterpret_f32_s32 (int32x2_t __a)
2906 {
2907 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2908 }
2909
2910 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2911 vreinterpret_f32_s64 (int64x1_t __a)
2912 {
2913 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2914 }
2915
2916 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2917 vreinterpret_f32_u8 (uint8x8_t __a)
2918 {
2919 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2920 }
2921
2922 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2923 vreinterpret_f32_u16 (uint16x4_t __a)
2924 {
2925 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2926 __a);
2927 }
2928
2929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2930 vreinterpret_f32_u32 (uint32x2_t __a)
2931 {
2932 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2933 __a);
2934 }
2935
2936 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2937 vreinterpret_f32_u64 (uint64x1_t __a)
2938 {
2939 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2940 }
2941
2942 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2943 vreinterpret_f32_p8 (poly8x8_t __a)
2944 {
2945 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2946 }
2947
2948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2949 vreinterpret_f32_p16 (poly16x4_t __a)
2950 {
2951 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2952 __a);
2953 }
2954
2955 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2956 vreinterpretq_f32_s8 (int8x16_t __a)
2957 {
2958 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2959 }
2960
2961 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2962 vreinterpretq_f32_s16 (int16x8_t __a)
2963 {
2964 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2965 }
2966
2967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2968 vreinterpretq_f32_s32 (int32x4_t __a)
2969 {
2970 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2971 }
2972
2973 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2974 vreinterpretq_f32_s64 (int64x2_t __a)
2975 {
2976 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2977 }
2978
2979 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2980 vreinterpretq_f32_u8 (uint8x16_t __a)
2981 {
2982 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2983 __a);
2984 }
2985
2986 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2987 vreinterpretq_f32_u16 (uint16x8_t __a)
2988 {
2989 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2990 __a);
2991 }
2992
2993 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2994 vreinterpretq_f32_u32 (uint32x4_t __a)
2995 {
2996 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2997 __a);
2998 }
2999
3000 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3001 vreinterpretq_f32_u64 (uint64x2_t __a)
3002 {
3003 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
3004 __a);
3005 }
3006
3007 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3008 vreinterpretq_f32_p8 (poly8x16_t __a)
3009 {
3010 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
3011 __a);
3012 }
3013
3014 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3015 vreinterpretq_f32_p16 (poly16x8_t __a)
3016 {
3017 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
3018 __a);
3019 }
3020
3021 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3022 vreinterpret_s64_s8 (int8x8_t __a)
3023 {
3024 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3025 }
3026
3027 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3028 vreinterpret_s64_s16 (int16x4_t __a)
3029 {
3030 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3031 }
3032
3033 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3034 vreinterpret_s64_s32 (int32x2_t __a)
3035 {
3036 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3037 }
3038
3039 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3040 vreinterpret_s64_f32 (float32x2_t __a)
3041 {
3042 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3043 }
3044
3045 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3046 vreinterpret_s64_u8 (uint8x8_t __a)
3047 {
3048 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3049 }
3050
3051 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3052 vreinterpret_s64_u16 (uint16x4_t __a)
3053 {
3054 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3055 }
3056
3057 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3058 vreinterpret_s64_u32 (uint32x2_t __a)
3059 {
3060 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3061 }
3062
3063 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3064 vreinterpret_s64_u64 (uint64x1_t __a)
3065 {
3066 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
3067 }
3068
3069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3070 vreinterpret_s64_p8 (poly8x8_t __a)
3071 {
3072 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3073 }
3074
3075 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3076 vreinterpret_s64_p16 (poly16x4_t __a)
3077 {
3078 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3079 }
3080
3081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3082 vreinterpretq_s64_s8 (int8x16_t __a)
3083 {
3084 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3085 }
3086
3087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3088 vreinterpretq_s64_s16 (int16x8_t __a)
3089 {
3090 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3091 }
3092
3093 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3094 vreinterpretq_s64_s32 (int32x4_t __a)
3095 {
3096 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3097 }
3098
3099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3100 vreinterpretq_s64_f32 (float32x4_t __a)
3101 {
3102 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3103 }
3104
3105 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3106 vreinterpretq_s64_u8 (uint8x16_t __a)
3107 {
3108 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3109 }
3110
3111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3112 vreinterpretq_s64_u16 (uint16x8_t __a)
3113 {
3114 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3115 }
3116
3117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3118 vreinterpretq_s64_u32 (uint32x4_t __a)
3119 {
3120 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3121 }
3122
3123 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3124 vreinterpretq_s64_u64 (uint64x2_t __a)
3125 {
3126 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3127 }
3128
3129 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3130 vreinterpretq_s64_p8 (poly8x16_t __a)
3131 {
3132 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3133 }
3134
3135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3136 vreinterpretq_s64_p16 (poly16x8_t __a)
3137 {
3138 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3139 }
3140
3141 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3142 vreinterpret_u64_s8 (int8x8_t __a)
3143 {
3144 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3145 }
3146
3147 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3148 vreinterpret_u64_s16 (int16x4_t __a)
3149 {
3150 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3151 }
3152
3153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3154 vreinterpret_u64_s32 (int32x2_t __a)
3155 {
3156 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3157 }
3158
3159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3160 vreinterpret_u64_s64 (int64x1_t __a)
3161 {
3162 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3163 }
3164
3165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3166 vreinterpret_u64_f32 (float32x2_t __a)
3167 {
3168 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3169 }
3170
3171 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3172 vreinterpret_u64_u8 (uint8x8_t __a)
3173 {
3174 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3175 }
3176
3177 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3178 vreinterpret_u64_u16 (uint16x4_t __a)
3179 {
3180 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3181 }
3182
3183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3184 vreinterpret_u64_u32 (uint32x2_t __a)
3185 {
3186 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3187 }
3188
3189 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3190 vreinterpret_u64_p8 (poly8x8_t __a)
3191 {
3192 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3193 }
3194
3195 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3196 vreinterpret_u64_p16 (poly16x4_t __a)
3197 {
3198 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3199 }
3200
3201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3202 vreinterpretq_u64_s8 (int8x16_t __a)
3203 {
3204 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3205 }
3206
3207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3208 vreinterpretq_u64_s16 (int16x8_t __a)
3209 {
3210 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3211 }
3212
3213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3214 vreinterpretq_u64_s32 (int32x4_t __a)
3215 {
3216 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3217 }
3218
3219 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3220 vreinterpretq_u64_s64 (int64x2_t __a)
3221 {
3222 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3223 }
3224
3225 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3226 vreinterpretq_u64_f32 (float32x4_t __a)
3227 {
3228 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3229 }
3230
3231 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3232 vreinterpretq_u64_u8 (uint8x16_t __a)
3233 {
3234 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3235 __a);
3236 }
3237
3238 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3239 vreinterpretq_u64_u16 (uint16x8_t __a)
3240 {
3241 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3242 }
3243
3244 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3245 vreinterpretq_u64_u32 (uint32x4_t __a)
3246 {
3247 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3248 }
3249
3250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3251 vreinterpretq_u64_p8 (poly8x16_t __a)
3252 {
3253 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3254 __a);
3255 }
3256
3257 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3258 vreinterpretq_u64_p16 (poly16x8_t __a)
3259 {
3260 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3261 }
3262
3263 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3264 vreinterpret_s8_s16 (int16x4_t __a)
3265 {
3266 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3267 }
3268
3269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3270 vreinterpret_s8_s32 (int32x2_t __a)
3271 {
3272 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3273 }
3274
3275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3276 vreinterpret_s8_s64 (int64x1_t __a)
3277 {
3278 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3279 }
3280
3281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3282 vreinterpret_s8_f32 (float32x2_t __a)
3283 {
3284 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3285 }
3286
3287 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3288 vreinterpret_s8_u8 (uint8x8_t __a)
3289 {
3290 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3291 }
3292
3293 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3294 vreinterpret_s8_u16 (uint16x4_t __a)
3295 {
3296 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3297 }
3298
3299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3300 vreinterpret_s8_u32 (uint32x2_t __a)
3301 {
3302 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3303 }
3304
3305 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3306 vreinterpret_s8_u64 (uint64x1_t __a)
3307 {
3308 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3309 }
3310
3311 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3312 vreinterpret_s8_p8 (poly8x8_t __a)
3313 {
3314 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3315 }
3316
3317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3318 vreinterpret_s8_p16 (poly16x4_t __a)
3319 {
3320 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3321 }
3322
3323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3324 vreinterpretq_s8_s16 (int16x8_t __a)
3325 {
3326 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3327 }
3328
3329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3330 vreinterpretq_s8_s32 (int32x4_t __a)
3331 {
3332 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3333 }
3334
3335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3336 vreinterpretq_s8_s64 (int64x2_t __a)
3337 {
3338 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3339 }
3340
3341 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3342 vreinterpretq_s8_f32 (float32x4_t __a)
3343 {
3344 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3345 }
3346
3347 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3348 vreinterpretq_s8_u8 (uint8x16_t __a)
3349 {
3350 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3351 __a);
3352 }
3353
3354 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3355 vreinterpretq_s8_u16 (uint16x8_t __a)
3356 {
3357 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3358 }
3359
3360 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3361 vreinterpretq_s8_u32 (uint32x4_t __a)
3362 {
3363 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3364 }
3365
3366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3367 vreinterpretq_s8_u64 (uint64x2_t __a)
3368 {
3369 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3370 }
3371
3372 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3373 vreinterpretq_s8_p8 (poly8x16_t __a)
3374 {
3375 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3376 __a);
3377 }
3378
3379 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3380 vreinterpretq_s8_p16 (poly16x8_t __a)
3381 {
3382 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3383 }
3384
3385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3386 vreinterpret_s16_s8 (int8x8_t __a)
3387 {
3388 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3389 }
3390
3391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3392 vreinterpret_s16_s32 (int32x2_t __a)
3393 {
3394 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3395 }
3396
3397 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3398 vreinterpret_s16_s64 (int64x1_t __a)
3399 {
3400 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3401 }
3402
3403 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3404 vreinterpret_s16_f32 (float32x2_t __a)
3405 {
3406 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3407 }
3408
3409 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3410 vreinterpret_s16_u8 (uint8x8_t __a)
3411 {
3412 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3413 }
3414
3415 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3416 vreinterpret_s16_u16 (uint16x4_t __a)
3417 {
3418 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3419 }
3420
3421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3422 vreinterpret_s16_u32 (uint32x2_t __a)
3423 {
3424 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3425 }
3426
3427 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3428 vreinterpret_s16_u64 (uint64x1_t __a)
3429 {
3430 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3431 }
3432
3433 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3434 vreinterpret_s16_p8 (poly8x8_t __a)
3435 {
3436 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3437 }
3438
3439 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3440 vreinterpret_s16_p16 (poly16x4_t __a)
3441 {
3442 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3443 }
3444
3445 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3446 vreinterpretq_s16_s8 (int8x16_t __a)
3447 {
3448 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3449 }
3450
3451 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3452 vreinterpretq_s16_s32 (int32x4_t __a)
3453 {
3454 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3455 }
3456
3457 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3458 vreinterpretq_s16_s64 (int64x2_t __a)
3459 {
3460 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3461 }
3462
3463 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3464 vreinterpretq_s16_f32 (float32x4_t __a)
3465 {
3466 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3467 }
3468
3469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3470 vreinterpretq_s16_u8 (uint8x16_t __a)
3471 {
3472 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3473 }
3474
3475 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3476 vreinterpretq_s16_u16 (uint16x8_t __a)
3477 {
3478 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3479 }
3480
3481 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3482 vreinterpretq_s16_u32 (uint32x4_t __a)
3483 {
3484 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3485 }
3486
3487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3488 vreinterpretq_s16_u64 (uint64x2_t __a)
3489 {
3490 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3491 }
3492
3493 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3494 vreinterpretq_s16_p8 (poly8x16_t __a)
3495 {
3496 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3497 }
3498
3499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3500 vreinterpretq_s16_p16 (poly16x8_t __a)
3501 {
3502 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3503 }
3504
3505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3506 vreinterpret_s32_s8 (int8x8_t __a)
3507 {
3508 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3509 }
3510
3511 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3512 vreinterpret_s32_s16 (int16x4_t __a)
3513 {
3514 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3515 }
3516
3517 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3518 vreinterpret_s32_s64 (int64x1_t __a)
3519 {
3520 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3521 }
3522
3523 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3524 vreinterpret_s32_f32 (float32x2_t __a)
3525 {
3526 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3527 }
3528
3529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3530 vreinterpret_s32_u8 (uint8x8_t __a)
3531 {
3532 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3533 }
3534
3535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3536 vreinterpret_s32_u16 (uint16x4_t __a)
3537 {
3538 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3539 }
3540
3541 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3542 vreinterpret_s32_u32 (uint32x2_t __a)
3543 {
3544 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3545 }
3546
3547 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3548 vreinterpret_s32_u64 (uint64x1_t __a)
3549 {
3550 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3551 }
3552
3553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3554 vreinterpret_s32_p8 (poly8x8_t __a)
3555 {
3556 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3557 }
3558
3559 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3560 vreinterpret_s32_p16 (poly16x4_t __a)
3561 {
3562 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3563 }
3564
3565 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3566 vreinterpretq_s32_s8 (int8x16_t __a)
3567 {
3568 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3569 }
3570
3571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3572 vreinterpretq_s32_s16 (int16x8_t __a)
3573 {
3574 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3575 }
3576
3577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3578 vreinterpretq_s32_s64 (int64x2_t __a)
3579 {
3580 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3581 }
3582
3583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3584 vreinterpretq_s32_f32 (float32x4_t __a)
3585 {
3586 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3587 }
3588
3589 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3590 vreinterpretq_s32_u8 (uint8x16_t __a)
3591 {
3592 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3593 }
3594
3595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3596 vreinterpretq_s32_u16 (uint16x8_t __a)
3597 {
3598 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3599 }
3600
3601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3602 vreinterpretq_s32_u32 (uint32x4_t __a)
3603 {
3604 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3605 }
3606
3607 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3608 vreinterpretq_s32_u64 (uint64x2_t __a)
3609 {
3610 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3611 }
3612
3613 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3614 vreinterpretq_s32_p8 (poly8x16_t __a)
3615 {
3616 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3617 }
3618
3619 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3620 vreinterpretq_s32_p16 (poly16x8_t __a)
3621 {
3622 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3623 }
3624
3625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3626 vreinterpret_u8_s8 (int8x8_t __a)
3627 {
3628 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3629 }
3630
3631 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3632 vreinterpret_u8_s16 (int16x4_t __a)
3633 {
3634 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3635 }
3636
3637 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3638 vreinterpret_u8_s32 (int32x2_t __a)
3639 {
3640 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3641 }
3642
3643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3644 vreinterpret_u8_s64 (int64x1_t __a)
3645 {
3646 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3647 }
3648
3649 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3650 vreinterpret_u8_f32 (float32x2_t __a)
3651 {
3652 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3653 }
3654
3655 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3656 vreinterpret_u8_u16 (uint16x4_t __a)
3657 {
3658 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3659 }
3660
3661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3662 vreinterpret_u8_u32 (uint32x2_t __a)
3663 {
3664 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3665 }
3666
3667 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3668 vreinterpret_u8_u64 (uint64x1_t __a)
3669 {
3670 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3671 }
3672
3673 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3674 vreinterpret_u8_p8 (poly8x8_t __a)
3675 {
3676 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3677 }
3678
3679 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3680 vreinterpret_u8_p16 (poly16x4_t __a)
3681 {
3682 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3683 }
3684
3685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3686 vreinterpretq_u8_s8 (int8x16_t __a)
3687 {
3688 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3689 }
3690
3691 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3692 vreinterpretq_u8_s16 (int16x8_t __a)
3693 {
3694 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3695 }
3696
3697 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3698 vreinterpretq_u8_s32 (int32x4_t __a)
3699 {
3700 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3701 }
3702
3703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3704 vreinterpretq_u8_s64 (int64x2_t __a)
3705 {
3706 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3707 }
3708
3709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3710 vreinterpretq_u8_f32 (float32x4_t __a)
3711 {
3712 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3713 }
3714
3715 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3716 vreinterpretq_u8_u16 (uint16x8_t __a)
3717 {
3718 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3719 __a);
3720 }
3721
3722 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3723 vreinterpretq_u8_u32 (uint32x4_t __a)
3724 {
3725 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3726 __a);
3727 }
3728
3729 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3730 vreinterpretq_u8_u64 (uint64x2_t __a)
3731 {
3732 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3733 __a);
3734 }
3735
3736 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3737 vreinterpretq_u8_p8 (poly8x16_t __a)
3738 {
3739 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3740 __a);
3741 }
3742
3743 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3744 vreinterpretq_u8_p16 (poly16x8_t __a)
3745 {
3746 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3747 __a);
3748 }
3749
3750 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3751 vreinterpret_u16_s8 (int8x8_t __a)
3752 {
3753 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3754 }
3755
3756 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3757 vreinterpret_u16_s16 (int16x4_t __a)
3758 {
3759 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3760 }
3761
3762 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3763 vreinterpret_u16_s32 (int32x2_t __a)
3764 {
3765 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3766 }
3767
3768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3769 vreinterpret_u16_s64 (int64x1_t __a)
3770 {
3771 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3772 }
3773
3774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3775 vreinterpret_u16_f32 (float32x2_t __a)
3776 {
3777 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3778 }
3779
3780 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3781 vreinterpret_u16_u8 (uint8x8_t __a)
3782 {
3783 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3784 }
3785
3786 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3787 vreinterpret_u16_u32 (uint32x2_t __a)
3788 {
3789 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3790 }
3791
3792 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3793 vreinterpret_u16_u64 (uint64x1_t __a)
3794 {
3795 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3796 }
3797
3798 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3799 vreinterpret_u16_p8 (poly8x8_t __a)
3800 {
3801 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3802 }
3803
3804 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3805 vreinterpret_u16_p16 (poly16x4_t __a)
3806 {
3807 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3808 }
3809
3810 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3811 vreinterpretq_u16_s8 (int8x16_t __a)
3812 {
3813 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3814 }
3815
3816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3817 vreinterpretq_u16_s16 (int16x8_t __a)
3818 {
3819 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3820 }
3821
3822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3823 vreinterpretq_u16_s32 (int32x4_t __a)
3824 {
3825 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3826 }
3827
3828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3829 vreinterpretq_u16_s64 (int64x2_t __a)
3830 {
3831 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3832 }
3833
3834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3835 vreinterpretq_u16_f32 (float32x4_t __a)
3836 {
3837 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3838 }
3839
3840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3841 vreinterpretq_u16_u8 (uint8x16_t __a)
3842 {
3843 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3844 __a);
3845 }
3846
3847 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3848 vreinterpretq_u16_u32 (uint32x4_t __a)
3849 {
3850 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3851 }
3852
3853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3854 vreinterpretq_u16_u64 (uint64x2_t __a)
3855 {
3856 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3857 }
3858
3859 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3860 vreinterpretq_u16_p8 (poly8x16_t __a)
3861 {
3862 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3863 __a);
3864 }
3865
3866 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3867 vreinterpretq_u16_p16 (poly16x8_t __a)
3868 {
3869 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3870 }
3871
3872 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3873 vreinterpret_u32_s8 (int8x8_t __a)
3874 {
3875 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3876 }
3877
3878 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3879 vreinterpret_u32_s16 (int16x4_t __a)
3880 {
3881 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3882 }
3883
3884 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3885 vreinterpret_u32_s32 (int32x2_t __a)
3886 {
3887 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3888 }
3889
3890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3891 vreinterpret_u32_s64 (int64x1_t __a)
3892 {
3893 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3894 }
3895
3896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3897 vreinterpret_u32_f32 (float32x2_t __a)
3898 {
3899 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3900 }
3901
3902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3903 vreinterpret_u32_u8 (uint8x8_t __a)
3904 {
3905 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3906 }
3907
3908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3909 vreinterpret_u32_u16 (uint16x4_t __a)
3910 {
3911 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3912 }
3913
3914 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3915 vreinterpret_u32_u64 (uint64x1_t __a)
3916 {
3917 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3918 }
3919
3920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3921 vreinterpret_u32_p8 (poly8x8_t __a)
3922 {
3923 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3924 }
3925
3926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3927 vreinterpret_u32_p16 (poly16x4_t __a)
3928 {
3929 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3930 }
3931
3932 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3933 vreinterpretq_u32_s8 (int8x16_t __a)
3934 {
3935 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3936 }
3937
3938 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3939 vreinterpretq_u32_s16 (int16x8_t __a)
3940 {
3941 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3942 }
3943
3944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3945 vreinterpretq_u32_s32 (int32x4_t __a)
3946 {
3947 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3948 }
3949
3950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3951 vreinterpretq_u32_s64 (int64x2_t __a)
3952 {
3953 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3954 }
3955
3956 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3957 vreinterpretq_u32_f32 (float32x4_t __a)
3958 {
3959 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3960 }
3961
3962 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3963 vreinterpretq_u32_u8 (uint8x16_t __a)
3964 {
3965 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3966 __a);
3967 }
3968
3969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3970 vreinterpretq_u32_u16 (uint16x8_t __a)
3971 {
3972 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3973 }
3974
3975 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3976 vreinterpretq_u32_u64 (uint64x2_t __a)
3977 {
3978 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3979 }
3980
3981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3982 vreinterpretq_u32_p8 (poly8x16_t __a)
3983 {
3984 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3985 __a);
3986 }
3987
3988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3989 vreinterpretq_u32_p16 (poly16x8_t __a)
3990 {
3991 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3992 }
3993
3994 #define __GET_LOW(__TYPE) \
3995 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
3996 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
3997 return vreinterpret_##__TYPE##_u64 (lo);
3998
3999 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4000 vget_low_f32 (float32x4_t __a)
4001 {
4002 __GET_LOW (f32);
4003 }
4004
4005 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4006 vget_low_f64 (float64x2_t __a)
4007 {
4008 return vgetq_lane_f64 (__a, 0);
4009 }
4010
4011 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4012 vget_low_p8 (poly8x16_t __a)
4013 {
4014 __GET_LOW (p8);
4015 }
4016
4017 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4018 vget_low_p16 (poly16x8_t __a)
4019 {
4020 __GET_LOW (p16);
4021 }
4022
4023 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4024 vget_low_s8 (int8x16_t __a)
4025 {
4026 __GET_LOW (s8);
4027 }
4028
4029 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4030 vget_low_s16 (int16x8_t __a)
4031 {
4032 __GET_LOW (s16);
4033 }
4034
4035 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4036 vget_low_s32 (int32x4_t __a)
4037 {
4038 __GET_LOW (s32);
4039 }
4040
4041 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4042 vget_low_s64 (int64x2_t __a)
4043 {
4044 return vgetq_lane_s64 (__a, 0);
4045 }
4046
4047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4048 vget_low_u8 (uint8x16_t __a)
4049 {
4050 __GET_LOW (u8);
4051 }
4052
4053 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4054 vget_low_u16 (uint16x8_t __a)
4055 {
4056 __GET_LOW (u16);
4057 }
4058
4059 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4060 vget_low_u32 (uint32x4_t __a)
4061 {
4062 __GET_LOW (u32);
4063 }
4064
4065 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4066 vget_low_u64 (uint64x2_t __a)
4067 {
4068 return vgetq_lane_u64 (__a, 0);
4069 }
4070
4071 #undef __GET_LOW
4072
4073 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4074 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4075 {
4076 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4077 }
4078
4079 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4080 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4081 {
4082 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4083 }
4084
4085 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4086 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4087 {
4088 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4089 }
4090
4091 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4092 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4093 {
4094 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4095 }
4096
4097 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4098 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4099 {
4100 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4101 }
4102
4103 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4104 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4105 {
4106 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4107 (int8x8_t) __b);
4108 }
4109
4110 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4111 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4112 {
4113 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4114 (int16x4_t) __b);
4115 }
4116
4117 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4118 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4119 {
4120 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4121 (int32x2_t) __b);
4122 }
4123
4124 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4125 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4126 {
4127 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4128 (int64x1_t) __b);
4129 }
4130
4131 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4132 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4133 {
4134 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4135 }
4136
4137 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4138 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4139 {
4140 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4141 (int8x8_t) __b);
4142 }
4143
4144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4145 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4146 {
4147 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4148 (int16x4_t) __b);
4149 }
4150
4151 /* Start of temporary inline asm implementations. */
4152
4153 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4154 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4155 {
4156 int8x8_t result;
4157 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4158 : "=w"(result)
4159 : "0"(a), "w"(b), "w"(c)
4160 : /* No clobbers */);
4161 return result;
4162 }
4163
4164 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4165 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4166 {
4167 int16x4_t result;
4168 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4169 : "=w"(result)
4170 : "0"(a), "w"(b), "w"(c)
4171 : /* No clobbers */);
4172 return result;
4173 }
4174
4175 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4176 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4177 {
4178 int32x2_t result;
4179 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4180 : "=w"(result)
4181 : "0"(a), "w"(b), "w"(c)
4182 : /* No clobbers */);
4183 return result;
4184 }
4185
4186 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4187 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4188 {
4189 uint8x8_t result;
4190 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4191 : "=w"(result)
4192 : "0"(a), "w"(b), "w"(c)
4193 : /* No clobbers */);
4194 return result;
4195 }
4196
4197 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4198 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4199 {
4200 uint16x4_t result;
4201 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4202 : "=w"(result)
4203 : "0"(a), "w"(b), "w"(c)
4204 : /* No clobbers */);
4205 return result;
4206 }
4207
4208 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4209 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4210 {
4211 uint32x2_t result;
4212 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4213 : "=w"(result)
4214 : "0"(a), "w"(b), "w"(c)
4215 : /* No clobbers */);
4216 return result;
4217 }
4218
4219 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4220 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4221 {
4222 int16x8_t result;
4223 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4224 : "=w"(result)
4225 : "0"(a), "w"(b), "w"(c)
4226 : /* No clobbers */);
4227 return result;
4228 }
4229
4230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4231 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4232 {
4233 int32x4_t result;
4234 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4235 : "=w"(result)
4236 : "0"(a), "w"(b), "w"(c)
4237 : /* No clobbers */);
4238 return result;
4239 }
4240
4241 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4242 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4243 {
4244 int64x2_t result;
4245 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4246 : "=w"(result)
4247 : "0"(a), "w"(b), "w"(c)
4248 : /* No clobbers */);
4249 return result;
4250 }
4251
4252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4253 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4254 {
4255 uint16x8_t result;
4256 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4257 : "=w"(result)
4258 : "0"(a), "w"(b), "w"(c)
4259 : /* No clobbers */);
4260 return result;
4261 }
4262
4263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4264 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4265 {
4266 uint32x4_t result;
4267 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4268 : "=w"(result)
4269 : "0"(a), "w"(b), "w"(c)
4270 : /* No clobbers */);
4271 return result;
4272 }
4273
4274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4275 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4276 {
4277 uint64x2_t result;
4278 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4279 : "=w"(result)
4280 : "0"(a), "w"(b), "w"(c)
4281 : /* No clobbers */);
4282 return result;
4283 }
4284
4285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4286 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4287 {
4288 int16x8_t result;
4289 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4290 : "=w"(result)
4291 : "0"(a), "w"(b), "w"(c)
4292 : /* No clobbers */);
4293 return result;
4294 }
4295
4296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4297 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4298 {
4299 int32x4_t result;
4300 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4301 : "=w"(result)
4302 : "0"(a), "w"(b), "w"(c)
4303 : /* No clobbers */);
4304 return result;
4305 }
4306
4307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4308 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4309 {
4310 int64x2_t result;
4311 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4312 : "=w"(result)
4313 : "0"(a), "w"(b), "w"(c)
4314 : /* No clobbers */);
4315 return result;
4316 }
4317
4318 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4319 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4320 {
4321 uint16x8_t result;
4322 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4323 : "=w"(result)
4324 : "0"(a), "w"(b), "w"(c)
4325 : /* No clobbers */);
4326 return result;
4327 }
4328
4329 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4330 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4331 {
4332 uint32x4_t result;
4333 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4334 : "=w"(result)
4335 : "0"(a), "w"(b), "w"(c)
4336 : /* No clobbers */);
4337 return result;
4338 }
4339
4340 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4341 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4342 {
4343 uint64x2_t result;
4344 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4345 : "=w"(result)
4346 : "0"(a), "w"(b), "w"(c)
4347 : /* No clobbers */);
4348 return result;
4349 }
4350
4351 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4352 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4353 {
4354 int8x16_t result;
4355 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4356 : "=w"(result)
4357 : "0"(a), "w"(b), "w"(c)
4358 : /* No clobbers */);
4359 return result;
4360 }
4361
4362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4363 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4364 {
4365 int16x8_t result;
4366 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4367 : "=w"(result)
4368 : "0"(a), "w"(b), "w"(c)
4369 : /* No clobbers */);
4370 return result;
4371 }
4372
4373 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4374 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4375 {
4376 int32x4_t result;
4377 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4378 : "=w"(result)
4379 : "0"(a), "w"(b), "w"(c)
4380 : /* No clobbers */);
4381 return result;
4382 }
4383
4384 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4385 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4386 {
4387 uint8x16_t result;
4388 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4389 : "=w"(result)
4390 : "0"(a), "w"(b), "w"(c)
4391 : /* No clobbers */);
4392 return result;
4393 }
4394
4395 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4396 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4397 {
4398 uint16x8_t result;
4399 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4400 : "=w"(result)
4401 : "0"(a), "w"(b), "w"(c)
4402 : /* No clobbers */);
4403 return result;
4404 }
4405
4406 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4407 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4408 {
4409 uint32x4_t result;
4410 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4411 : "=w"(result)
4412 : "0"(a), "w"(b), "w"(c)
4413 : /* No clobbers */);
4414 return result;
4415 }
4416
4417 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4418 vabd_f32 (float32x2_t a, float32x2_t b)
4419 {
4420 float32x2_t result;
4421 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4422 : "=w"(result)
4423 : "w"(a), "w"(b)
4424 : /* No clobbers */);
4425 return result;
4426 }
4427
4428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4429 vabd_s8 (int8x8_t a, int8x8_t b)
4430 {
4431 int8x8_t result;
4432 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4433 : "=w"(result)
4434 : "w"(a), "w"(b)
4435 : /* No clobbers */);
4436 return result;
4437 }
4438
4439 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4440 vabd_s16 (int16x4_t a, int16x4_t b)
4441 {
4442 int16x4_t result;
4443 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4444 : "=w"(result)
4445 : "w"(a), "w"(b)
4446 : /* No clobbers */);
4447 return result;
4448 }
4449
4450 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4451 vabd_s32 (int32x2_t a, int32x2_t b)
4452 {
4453 int32x2_t result;
4454 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4455 : "=w"(result)
4456 : "w"(a), "w"(b)
4457 : /* No clobbers */);
4458 return result;
4459 }
4460
4461 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4462 vabd_u8 (uint8x8_t a, uint8x8_t b)
4463 {
4464 uint8x8_t result;
4465 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4466 : "=w"(result)
4467 : "w"(a), "w"(b)
4468 : /* No clobbers */);
4469 return result;
4470 }
4471
4472 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4473 vabd_u16 (uint16x4_t a, uint16x4_t b)
4474 {
4475 uint16x4_t result;
4476 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4477 : "=w"(result)
4478 : "w"(a), "w"(b)
4479 : /* No clobbers */);
4480 return result;
4481 }
4482
4483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4484 vabd_u32 (uint32x2_t a, uint32x2_t b)
4485 {
4486 uint32x2_t result;
4487 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4488 : "=w"(result)
4489 : "w"(a), "w"(b)
4490 : /* No clobbers */);
4491 return result;
4492 }
4493
4494 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4495 vabdd_f64 (float64_t a, float64_t b)
4496 {
4497 float64_t result;
4498 __asm__ ("fabd %d0, %d1, %d2"
4499 : "=w"(result)
4500 : "w"(a), "w"(b)
4501 : /* No clobbers */);
4502 return result;
4503 }
4504
4505 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4506 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4507 {
4508 int16x8_t result;
4509 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4510 : "=w"(result)
4511 : "w"(a), "w"(b)
4512 : /* No clobbers */);
4513 return result;
4514 }
4515
4516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4517 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4518 {
4519 int32x4_t result;
4520 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4521 : "=w"(result)
4522 : "w"(a), "w"(b)
4523 : /* No clobbers */);
4524 return result;
4525 }
4526
4527 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4528 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4529 {
4530 int64x2_t result;
4531 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4532 : "=w"(result)
4533 : "w"(a), "w"(b)
4534 : /* No clobbers */);
4535 return result;
4536 }
4537
4538 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4539 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4540 {
4541 uint16x8_t result;
4542 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4543 : "=w"(result)
4544 : "w"(a), "w"(b)
4545 : /* No clobbers */);
4546 return result;
4547 }
4548
4549 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4550 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4551 {
4552 uint32x4_t result;
4553 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4554 : "=w"(result)
4555 : "w"(a), "w"(b)
4556 : /* No clobbers */);
4557 return result;
4558 }
4559
4560 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4561 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4562 {
4563 uint64x2_t result;
4564 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4565 : "=w"(result)
4566 : "w"(a), "w"(b)
4567 : /* No clobbers */);
4568 return result;
4569 }
4570
4571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4572 vabdl_s8 (int8x8_t a, int8x8_t b)
4573 {
4574 int16x8_t result;
4575 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4576 : "=w"(result)
4577 : "w"(a), "w"(b)
4578 : /* No clobbers */);
4579 return result;
4580 }
4581
4582 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4583 vabdl_s16 (int16x4_t a, int16x4_t b)
4584 {
4585 int32x4_t result;
4586 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4587 : "=w"(result)
4588 : "w"(a), "w"(b)
4589 : /* No clobbers */);
4590 return result;
4591 }
4592
4593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4594 vabdl_s32 (int32x2_t a, int32x2_t b)
4595 {
4596 int64x2_t result;
4597 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4598 : "=w"(result)
4599 : "w"(a), "w"(b)
4600 : /* No clobbers */);
4601 return result;
4602 }
4603
4604 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4605 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4606 {
4607 uint16x8_t result;
4608 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4609 : "=w"(result)
4610 : "w"(a), "w"(b)
4611 : /* No clobbers */);
4612 return result;
4613 }
4614
4615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4616 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4617 {
4618 uint32x4_t result;
4619 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4620 : "=w"(result)
4621 : "w"(a), "w"(b)
4622 : /* No clobbers */);
4623 return result;
4624 }
4625
4626 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4627 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4628 {
4629 uint64x2_t result;
4630 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4631 : "=w"(result)
4632 : "w"(a), "w"(b)
4633 : /* No clobbers */);
4634 return result;
4635 }
4636
4637 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4638 vabdq_f32 (float32x4_t a, float32x4_t b)
4639 {
4640 float32x4_t result;
4641 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4642 : "=w"(result)
4643 : "w"(a), "w"(b)
4644 : /* No clobbers */);
4645 return result;
4646 }
4647
4648 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4649 vabdq_f64 (float64x2_t a, float64x2_t b)
4650 {
4651 float64x2_t result;
4652 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4653 : "=w"(result)
4654 : "w"(a), "w"(b)
4655 : /* No clobbers */);
4656 return result;
4657 }
4658
4659 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4660 vabdq_s8 (int8x16_t a, int8x16_t b)
4661 {
4662 int8x16_t result;
4663 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4664 : "=w"(result)
4665 : "w"(a), "w"(b)
4666 : /* No clobbers */);
4667 return result;
4668 }
4669
4670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4671 vabdq_s16 (int16x8_t a, int16x8_t b)
4672 {
4673 int16x8_t result;
4674 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4675 : "=w"(result)
4676 : "w"(a), "w"(b)
4677 : /* No clobbers */);
4678 return result;
4679 }
4680
4681 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4682 vabdq_s32 (int32x4_t a, int32x4_t b)
4683 {
4684 int32x4_t result;
4685 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4686 : "=w"(result)
4687 : "w"(a), "w"(b)
4688 : /* No clobbers */);
4689 return result;
4690 }
4691
4692 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4693 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4694 {
4695 uint8x16_t result;
4696 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4697 : "=w"(result)
4698 : "w"(a), "w"(b)
4699 : /* No clobbers */);
4700 return result;
4701 }
4702
4703 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4704 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4705 {
4706 uint16x8_t result;
4707 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4708 : "=w"(result)
4709 : "w"(a), "w"(b)
4710 : /* No clobbers */);
4711 return result;
4712 }
4713
4714 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4715 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4716 {
4717 uint32x4_t result;
4718 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4719 : "=w"(result)
4720 : "w"(a), "w"(b)
4721 : /* No clobbers */);
4722 return result;
4723 }
4724
4725 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4726 vabds_f32 (float32_t a, float32_t b)
4727 {
4728 float32_t result;
4729 __asm__ ("fabd %s0, %s1, %s2"
4730 : "=w"(result)
4731 : "w"(a), "w"(b)
4732 : /* No clobbers */);
4733 return result;
4734 }
4735
4736 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4737 vaddlv_s8 (int8x8_t a)
4738 {
4739 int16_t result;
4740 __asm__ ("saddlv %h0,%1.8b"
4741 : "=w"(result)
4742 : "w"(a)
4743 : /* No clobbers */);
4744 return result;
4745 }
4746
4747 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4748 vaddlv_s16 (int16x4_t a)
4749 {
4750 int32_t result;
4751 __asm__ ("saddlv %s0,%1.4h"
4752 : "=w"(result)
4753 : "w"(a)
4754 : /* No clobbers */);
4755 return result;
4756 }
4757
4758 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4759 vaddlv_u8 (uint8x8_t a)
4760 {
4761 uint16_t result;
4762 __asm__ ("uaddlv %h0,%1.8b"
4763 : "=w"(result)
4764 : "w"(a)
4765 : /* No clobbers */);
4766 return result;
4767 }
4768
4769 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4770 vaddlv_u16 (uint16x4_t a)
4771 {
4772 uint32_t result;
4773 __asm__ ("uaddlv %s0,%1.4h"
4774 : "=w"(result)
4775 : "w"(a)
4776 : /* No clobbers */);
4777 return result;
4778 }
4779
4780 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4781 vaddlvq_s8 (int8x16_t a)
4782 {
4783 int16_t result;
4784 __asm__ ("saddlv %h0,%1.16b"
4785 : "=w"(result)
4786 : "w"(a)
4787 : /* No clobbers */);
4788 return result;
4789 }
4790
4791 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4792 vaddlvq_s16 (int16x8_t a)
4793 {
4794 int32_t result;
4795 __asm__ ("saddlv %s0,%1.8h"
4796 : "=w"(result)
4797 : "w"(a)
4798 : /* No clobbers */);
4799 return result;
4800 }
4801
4802 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4803 vaddlvq_s32 (int32x4_t a)
4804 {
4805 int64_t result;
4806 __asm__ ("saddlv %d0,%1.4s"
4807 : "=w"(result)
4808 : "w"(a)
4809 : /* No clobbers */);
4810 return result;
4811 }
4812
4813 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4814 vaddlvq_u8 (uint8x16_t a)
4815 {
4816 uint16_t result;
4817 __asm__ ("uaddlv %h0,%1.16b"
4818 : "=w"(result)
4819 : "w"(a)
4820 : /* No clobbers */);
4821 return result;
4822 }
4823
4824 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4825 vaddlvq_u16 (uint16x8_t a)
4826 {
4827 uint32_t result;
4828 __asm__ ("uaddlv %s0,%1.8h"
4829 : "=w"(result)
4830 : "w"(a)
4831 : /* No clobbers */);
4832 return result;
4833 }
4834
4835 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4836 vaddlvq_u32 (uint32x4_t a)
4837 {
4838 uint64_t result;
4839 __asm__ ("uaddlv %d0,%1.4s"
4840 : "=w"(result)
4841 : "w"(a)
4842 : /* No clobbers */);
4843 return result;
4844 }
4845
4846 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4847 vcls_s8 (int8x8_t a)
4848 {
4849 int8x8_t result;
4850 __asm__ ("cls %0.8b,%1.8b"
4851 : "=w"(result)
4852 : "w"(a)
4853 : /* No clobbers */);
4854 return result;
4855 }
4856
4857 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4858 vcls_s16 (int16x4_t a)
4859 {
4860 int16x4_t result;
4861 __asm__ ("cls %0.4h,%1.4h"
4862 : "=w"(result)
4863 : "w"(a)
4864 : /* No clobbers */);
4865 return result;
4866 }
4867
4868 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4869 vcls_s32 (int32x2_t a)
4870 {
4871 int32x2_t result;
4872 __asm__ ("cls %0.2s,%1.2s"
4873 : "=w"(result)
4874 : "w"(a)
4875 : /* No clobbers */);
4876 return result;
4877 }
4878
4879 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4880 vclsq_s8 (int8x16_t a)
4881 {
4882 int8x16_t result;
4883 __asm__ ("cls %0.16b,%1.16b"
4884 : "=w"(result)
4885 : "w"(a)
4886 : /* No clobbers */);
4887 return result;
4888 }
4889
4890 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4891 vclsq_s16 (int16x8_t a)
4892 {
4893 int16x8_t result;
4894 __asm__ ("cls %0.8h,%1.8h"
4895 : "=w"(result)
4896 : "w"(a)
4897 : /* No clobbers */);
4898 return result;
4899 }
4900
4901 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4902 vclsq_s32 (int32x4_t a)
4903 {
4904 int32x4_t result;
4905 __asm__ ("cls %0.4s,%1.4s"
4906 : "=w"(result)
4907 : "w"(a)
4908 : /* No clobbers */);
4909 return result;
4910 }
4911
4912 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4913 vcnt_p8 (poly8x8_t a)
4914 {
4915 poly8x8_t result;
4916 __asm__ ("cnt %0.8b,%1.8b"
4917 : "=w"(result)
4918 : "w"(a)
4919 : /* No clobbers */);
4920 return result;
4921 }
4922
4923 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4924 vcnt_s8 (int8x8_t a)
4925 {
4926 int8x8_t result;
4927 __asm__ ("cnt %0.8b,%1.8b"
4928 : "=w"(result)
4929 : "w"(a)
4930 : /* No clobbers */);
4931 return result;
4932 }
4933
4934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4935 vcnt_u8 (uint8x8_t a)
4936 {
4937 uint8x8_t result;
4938 __asm__ ("cnt %0.8b,%1.8b"
4939 : "=w"(result)
4940 : "w"(a)
4941 : /* No clobbers */);
4942 return result;
4943 }
4944
4945 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4946 vcntq_p8 (poly8x16_t a)
4947 {
4948 poly8x16_t result;
4949 __asm__ ("cnt %0.16b,%1.16b"
4950 : "=w"(result)
4951 : "w"(a)
4952 : /* No clobbers */);
4953 return result;
4954 }
4955
4956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4957 vcntq_s8 (int8x16_t a)
4958 {
4959 int8x16_t result;
4960 __asm__ ("cnt %0.16b,%1.16b"
4961 : "=w"(result)
4962 : "w"(a)
4963 : /* No clobbers */);
4964 return result;
4965 }
4966
4967 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4968 vcntq_u8 (uint8x16_t a)
4969 {
4970 uint8x16_t result;
4971 __asm__ ("cnt %0.16b,%1.16b"
4972 : "=w"(result)
4973 : "w"(a)
4974 : /* No clobbers */);
4975 return result;
4976 }
4977
4978 #define vcopyq_lane_f32(a, b, c, d) \
4979 __extension__ \
4980 ({ \
4981 float32x4_t c_ = (c); \
4982 float32x4_t a_ = (a); \
4983 float32x4_t result; \
4984 __asm__ ("ins %0.s[%2], %3.s[%4]" \
4985 : "=w"(result) \
4986 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
4987 : /* No clobbers */); \
4988 result; \
4989 })
4990
4991 #define vcopyq_lane_f64(a, b, c, d) \
4992 __extension__ \
4993 ({ \
4994 float64x2_t c_ = (c); \
4995 float64x2_t a_ = (a); \
4996 float64x2_t result; \
4997 __asm__ ("ins %0.d[%2], %3.d[%4]" \
4998 : "=w"(result) \
4999 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5000 : /* No clobbers */); \
5001 result; \
5002 })
5003
5004 #define vcopyq_lane_p8(a, b, c, d) \
5005 __extension__ \
5006 ({ \
5007 poly8x16_t c_ = (c); \
5008 poly8x16_t a_ = (a); \
5009 poly8x16_t result; \
5010 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5011 : "=w"(result) \
5012 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5013 : /* No clobbers */); \
5014 result; \
5015 })
5016
5017 #define vcopyq_lane_p16(a, b, c, d) \
5018 __extension__ \
5019 ({ \
5020 poly16x8_t c_ = (c); \
5021 poly16x8_t a_ = (a); \
5022 poly16x8_t result; \
5023 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5024 : "=w"(result) \
5025 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5026 : /* No clobbers */); \
5027 result; \
5028 })
5029
5030 #define vcopyq_lane_s8(a, b, c, d) \
5031 __extension__ \
5032 ({ \
5033 int8x16_t c_ = (c); \
5034 int8x16_t a_ = (a); \
5035 int8x16_t result; \
5036 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5037 : "=w"(result) \
5038 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5039 : /* No clobbers */); \
5040 result; \
5041 })
5042
5043 #define vcopyq_lane_s16(a, b, c, d) \
5044 __extension__ \
5045 ({ \
5046 int16x8_t c_ = (c); \
5047 int16x8_t a_ = (a); \
5048 int16x8_t result; \
5049 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5050 : "=w"(result) \
5051 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5052 : /* No clobbers */); \
5053 result; \
5054 })
5055
5056 #define vcopyq_lane_s32(a, b, c, d) \
5057 __extension__ \
5058 ({ \
5059 int32x4_t c_ = (c); \
5060 int32x4_t a_ = (a); \
5061 int32x4_t result; \
5062 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5063 : "=w"(result) \
5064 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5065 : /* No clobbers */); \
5066 result; \
5067 })
5068
5069 #define vcopyq_lane_s64(a, b, c, d) \
5070 __extension__ \
5071 ({ \
5072 int64x2_t c_ = (c); \
5073 int64x2_t a_ = (a); \
5074 int64x2_t result; \
5075 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5076 : "=w"(result) \
5077 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5078 : /* No clobbers */); \
5079 result; \
5080 })
5081
5082 #define vcopyq_lane_u8(a, b, c, d) \
5083 __extension__ \
5084 ({ \
5085 uint8x16_t c_ = (c); \
5086 uint8x16_t a_ = (a); \
5087 uint8x16_t result; \
5088 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5089 : "=w"(result) \
5090 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5091 : /* No clobbers */); \
5092 result; \
5093 })
5094
5095 #define vcopyq_lane_u16(a, b, c, d) \
5096 __extension__ \
5097 ({ \
5098 uint16x8_t c_ = (c); \
5099 uint16x8_t a_ = (a); \
5100 uint16x8_t result; \
5101 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5102 : "=w"(result) \
5103 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5104 : /* No clobbers */); \
5105 result; \
5106 })
5107
5108 #define vcopyq_lane_u32(a, b, c, d) \
5109 __extension__ \
5110 ({ \
5111 uint32x4_t c_ = (c); \
5112 uint32x4_t a_ = (a); \
5113 uint32x4_t result; \
5114 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5115 : "=w"(result) \
5116 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5117 : /* No clobbers */); \
5118 result; \
5119 })
5120
5121 #define vcopyq_lane_u64(a, b, c, d) \
5122 __extension__ \
5123 ({ \
5124 uint64x2_t c_ = (c); \
5125 uint64x2_t a_ = (a); \
5126 uint64x2_t result; \
5127 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5128 : "=w"(result) \
5129 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5130 : /* No clobbers */); \
5131 result; \
5132 })
5133
5134 /* vcvt_f16_f32 not supported */
5135
5136 /* vcvt_f32_f16 not supported */
5137
5138 /* vcvt_high_f16_f32 not supported */
5139
5140 /* vcvt_high_f32_f16 not supported */
5141
5142 static float32x2_t vdup_n_f32 (float32_t);
5143
5144 #define vcvt_n_f32_s32(a, b) \
5145 __extension__ \
5146 ({ \
5147 int32x2_t a_ = (a); \
5148 float32x2_t result; \
5149 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5150 : "=w"(result) \
5151 : "w"(a_), "i"(b) \
5152 : /* No clobbers */); \
5153 result; \
5154 })
5155
5156 #define vcvt_n_f32_u32(a, b) \
5157 __extension__ \
5158 ({ \
5159 uint32x2_t a_ = (a); \
5160 float32x2_t result; \
5161 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5162 : "=w"(result) \
5163 : "w"(a_), "i"(b) \
5164 : /* No clobbers */); \
5165 result; \
5166 })
5167
5168 #define vcvt_n_s32_f32(a, b) \
5169 __extension__ \
5170 ({ \
5171 float32x2_t a_ = (a); \
5172 int32x2_t result; \
5173 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5174 : "=w"(result) \
5175 : "w"(a_), "i"(b) \
5176 : /* No clobbers */); \
5177 result; \
5178 })
5179
5180 #define vcvt_n_u32_f32(a, b) \
5181 __extension__ \
5182 ({ \
5183 float32x2_t a_ = (a); \
5184 uint32x2_t result; \
5185 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5186 : "=w"(result) \
5187 : "w"(a_), "i"(b) \
5188 : /* No clobbers */); \
5189 result; \
5190 })
5191
5192 #define vcvtd_n_f64_s64(a, b) \
5193 __extension__ \
5194 ({ \
5195 int64_t a_ = (a); \
5196 float64_t result; \
5197 __asm__ ("scvtf %d0,%d1,%2" \
5198 : "=w"(result) \
5199 : "w"(a_), "i"(b) \
5200 : /* No clobbers */); \
5201 result; \
5202 })
5203
5204 #define vcvtd_n_f64_u64(a, b) \
5205 __extension__ \
5206 ({ \
5207 uint64_t a_ = (a); \
5208 float64_t result; \
5209 __asm__ ("ucvtf %d0,%d1,%2" \
5210 : "=w"(result) \
5211 : "w"(a_), "i"(b) \
5212 : /* No clobbers */); \
5213 result; \
5214 })
5215
5216 #define vcvtd_n_s64_f64(a, b) \
5217 __extension__ \
5218 ({ \
5219 float64_t a_ = (a); \
5220 int64_t result; \
5221 __asm__ ("fcvtzs %d0,%d1,%2" \
5222 : "=w"(result) \
5223 : "w"(a_), "i"(b) \
5224 : /* No clobbers */); \
5225 result; \
5226 })
5227
5228 #define vcvtd_n_u64_f64(a, b) \
5229 __extension__ \
5230 ({ \
5231 float64_t a_ = (a); \
5232 uint64_t result; \
5233 __asm__ ("fcvtzu %d0,%d1,%2" \
5234 : "=w"(result) \
5235 : "w"(a_), "i"(b) \
5236 : /* No clobbers */); \
5237 result; \
5238 })
5239
5240 #define vcvtq_n_f32_s32(a, b) \
5241 __extension__ \
5242 ({ \
5243 int32x4_t a_ = (a); \
5244 float32x4_t result; \
5245 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5246 : "=w"(result) \
5247 : "w"(a_), "i"(b) \
5248 : /* No clobbers */); \
5249 result; \
5250 })
5251
5252 #define vcvtq_n_f32_u32(a, b) \
5253 __extension__ \
5254 ({ \
5255 uint32x4_t a_ = (a); \
5256 float32x4_t result; \
5257 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5258 : "=w"(result) \
5259 : "w"(a_), "i"(b) \
5260 : /* No clobbers */); \
5261 result; \
5262 })
5263
5264 #define vcvtq_n_f64_s64(a, b) \
5265 __extension__ \
5266 ({ \
5267 int64x2_t a_ = (a); \
5268 float64x2_t result; \
5269 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5270 : "=w"(result) \
5271 : "w"(a_), "i"(b) \
5272 : /* No clobbers */); \
5273 result; \
5274 })
5275
5276 #define vcvtq_n_f64_u64(a, b) \
5277 __extension__ \
5278 ({ \
5279 uint64x2_t a_ = (a); \
5280 float64x2_t result; \
5281 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5282 : "=w"(result) \
5283 : "w"(a_), "i"(b) \
5284 : /* No clobbers */); \
5285 result; \
5286 })
5287
5288 #define vcvtq_n_s32_f32(a, b) \
5289 __extension__ \
5290 ({ \
5291 float32x4_t a_ = (a); \
5292 int32x4_t result; \
5293 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5294 : "=w"(result) \
5295 : "w"(a_), "i"(b) \
5296 : /* No clobbers */); \
5297 result; \
5298 })
5299
5300 #define vcvtq_n_s64_f64(a, b) \
5301 __extension__ \
5302 ({ \
5303 float64x2_t a_ = (a); \
5304 int64x2_t result; \
5305 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5306 : "=w"(result) \
5307 : "w"(a_), "i"(b) \
5308 : /* No clobbers */); \
5309 result; \
5310 })
5311
5312 #define vcvtq_n_u32_f32(a, b) \
5313 __extension__ \
5314 ({ \
5315 float32x4_t a_ = (a); \
5316 uint32x4_t result; \
5317 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5318 : "=w"(result) \
5319 : "w"(a_), "i"(b) \
5320 : /* No clobbers */); \
5321 result; \
5322 })
5323
5324 #define vcvtq_n_u64_f64(a, b) \
5325 __extension__ \
5326 ({ \
5327 float64x2_t a_ = (a); \
5328 uint64x2_t result; \
5329 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5330 : "=w"(result) \
5331 : "w"(a_), "i"(b) \
5332 : /* No clobbers */); \
5333 result; \
5334 })
5335
5336 #define vcvts_n_f32_s32(a, b) \
5337 __extension__ \
5338 ({ \
5339 int32_t a_ = (a); \
5340 float32_t result; \
5341 __asm__ ("scvtf %s0,%s1,%2" \
5342 : "=w"(result) \
5343 : "w"(a_), "i"(b) \
5344 : /* No clobbers */); \
5345 result; \
5346 })
5347
5348 #define vcvts_n_f32_u32(a, b) \
5349 __extension__ \
5350 ({ \
5351 uint32_t a_ = (a); \
5352 float32_t result; \
5353 __asm__ ("ucvtf %s0,%s1,%2" \
5354 : "=w"(result) \
5355 : "w"(a_), "i"(b) \
5356 : /* No clobbers */); \
5357 result; \
5358 })
5359
5360 #define vcvts_n_s32_f32(a, b) \
5361 __extension__ \
5362 ({ \
5363 float32_t a_ = (a); \
5364 int32_t result; \
5365 __asm__ ("fcvtzs %s0,%s1,%2" \
5366 : "=w"(result) \
5367 : "w"(a_), "i"(b) \
5368 : /* No clobbers */); \
5369 result; \
5370 })
5371
5372 #define vcvts_n_u32_f32(a, b) \
5373 __extension__ \
5374 ({ \
5375 float32_t a_ = (a); \
5376 uint32_t result; \
5377 __asm__ ("fcvtzu %s0,%s1,%2" \
5378 : "=w"(result) \
5379 : "w"(a_), "i"(b) \
5380 : /* No clobbers */); \
5381 result; \
5382 })
5383
5384 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5385 vcvtx_f32_f64 (float64x2_t a)
5386 {
5387 float32x2_t result;
5388 __asm__ ("fcvtxn %0.2s,%1.2d"
5389 : "=w"(result)
5390 : "w"(a)
5391 : /* No clobbers */);
5392 return result;
5393 }
5394
5395 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5396 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5397 {
5398 float32x4_t result;
5399 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5400 : "=w"(result)
5401 : "w" (b), "0"(a)
5402 : /* No clobbers */);
5403 return result;
5404 }
5405
5406 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5407 vcvtxd_f32_f64 (float64_t a)
5408 {
5409 float32_t result;
5410 __asm__ ("fcvtxn %s0,%d1"
5411 : "=w"(result)
5412 : "w"(a)
5413 : /* No clobbers */);
5414 return result;
5415 }
5416
5417 #define vext_f32(a, b, c) \
5418 __extension__ \
5419 ({ \
5420 float32x2_t b_ = (b); \
5421 float32x2_t a_ = (a); \
5422 float32x2_t result; \
5423 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5424 : "=w"(result) \
5425 : "w"(a_), "w"(b_), "i"(c) \
5426 : /* No clobbers */); \
5427 result; \
5428 })
5429
5430 #define vext_f64(a, b, c) \
5431 __extension__ \
5432 ({ \
5433 float64x1_t b_ = (b); \
5434 float64x1_t a_ = (a); \
5435 float64x1_t result; \
5436 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5437 : "=w"(result) \
5438 : "w"(a_), "w"(b_), "i"(c) \
5439 : /* No clobbers */); \
5440 result; \
5441 })
5442
5443 #define vext_p8(a, b, c) \
5444 __extension__ \
5445 ({ \
5446 poly8x8_t b_ = (b); \
5447 poly8x8_t a_ = (a); \
5448 poly8x8_t result; \
5449 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5450 : "=w"(result) \
5451 : "w"(a_), "w"(b_), "i"(c) \
5452 : /* No clobbers */); \
5453 result; \
5454 })
5455
5456 #define vext_p16(a, b, c) \
5457 __extension__ \
5458 ({ \
5459 poly16x4_t b_ = (b); \
5460 poly16x4_t a_ = (a); \
5461 poly16x4_t result; \
5462 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5463 : "=w"(result) \
5464 : "w"(a_), "w"(b_), "i"(c) \
5465 : /* No clobbers */); \
5466 result; \
5467 })
5468
5469 #define vext_s8(a, b, c) \
5470 __extension__ \
5471 ({ \
5472 int8x8_t b_ = (b); \
5473 int8x8_t a_ = (a); \
5474 int8x8_t result; \
5475 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5476 : "=w"(result) \
5477 : "w"(a_), "w"(b_), "i"(c) \
5478 : /* No clobbers */); \
5479 result; \
5480 })
5481
5482 #define vext_s16(a, b, c) \
5483 __extension__ \
5484 ({ \
5485 int16x4_t b_ = (b); \
5486 int16x4_t a_ = (a); \
5487 int16x4_t result; \
5488 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5489 : "=w"(result) \
5490 : "w"(a_), "w"(b_), "i"(c) \
5491 : /* No clobbers */); \
5492 result; \
5493 })
5494
5495 #define vext_s32(a, b, c) \
5496 __extension__ \
5497 ({ \
5498 int32x2_t b_ = (b); \
5499 int32x2_t a_ = (a); \
5500 int32x2_t result; \
5501 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5502 : "=w"(result) \
5503 : "w"(a_), "w"(b_), "i"(c) \
5504 : /* No clobbers */); \
5505 result; \
5506 })
5507
5508 #define vext_s64(a, b, c) \
5509 __extension__ \
5510 ({ \
5511 int64x1_t b_ = (b); \
5512 int64x1_t a_ = (a); \
5513 int64x1_t result; \
5514 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5515 : "=w"(result) \
5516 : "w"(a_), "w"(b_), "i"(c) \
5517 : /* No clobbers */); \
5518 result; \
5519 })
5520
5521 #define vext_u8(a, b, c) \
5522 __extension__ \
5523 ({ \
5524 uint8x8_t b_ = (b); \
5525 uint8x8_t a_ = (a); \
5526 uint8x8_t result; \
5527 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5528 : "=w"(result) \
5529 : "w"(a_), "w"(b_), "i"(c) \
5530 : /* No clobbers */); \
5531 result; \
5532 })
5533
5534 #define vext_u16(a, b, c) \
5535 __extension__ \
5536 ({ \
5537 uint16x4_t b_ = (b); \
5538 uint16x4_t a_ = (a); \
5539 uint16x4_t result; \
5540 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5541 : "=w"(result) \
5542 : "w"(a_), "w"(b_), "i"(c) \
5543 : /* No clobbers */); \
5544 result; \
5545 })
5546
5547 #define vext_u32(a, b, c) \
5548 __extension__ \
5549 ({ \
5550 uint32x2_t b_ = (b); \
5551 uint32x2_t a_ = (a); \
5552 uint32x2_t result; \
5553 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5554 : "=w"(result) \
5555 : "w"(a_), "w"(b_), "i"(c) \
5556 : /* No clobbers */); \
5557 result; \
5558 })
5559
5560 #define vext_u64(a, b, c) \
5561 __extension__ \
5562 ({ \
5563 uint64x1_t b_ = (b); \
5564 uint64x1_t a_ = (a); \
5565 uint64x1_t result; \
5566 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5567 : "=w"(result) \
5568 : "w"(a_), "w"(b_), "i"(c) \
5569 : /* No clobbers */); \
5570 result; \
5571 })
5572
5573 #define vextq_f32(a, b, c) \
5574 __extension__ \
5575 ({ \
5576 float32x4_t b_ = (b); \
5577 float32x4_t a_ = (a); \
5578 float32x4_t result; \
5579 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5580 : "=w"(result) \
5581 : "w"(a_), "w"(b_), "i"(c) \
5582 : /* No clobbers */); \
5583 result; \
5584 })
5585
5586 #define vextq_f64(a, b, c) \
5587 __extension__ \
5588 ({ \
5589 float64x2_t b_ = (b); \
5590 float64x2_t a_ = (a); \
5591 float64x2_t result; \
5592 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5593 : "=w"(result) \
5594 : "w"(a_), "w"(b_), "i"(c) \
5595 : /* No clobbers */); \
5596 result; \
5597 })
5598
5599 #define vextq_p8(a, b, c) \
5600 __extension__ \
5601 ({ \
5602 poly8x16_t b_ = (b); \
5603 poly8x16_t a_ = (a); \
5604 poly8x16_t result; \
5605 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5606 : "=w"(result) \
5607 : "w"(a_), "w"(b_), "i"(c) \
5608 : /* No clobbers */); \
5609 result; \
5610 })
5611
5612 #define vextq_p16(a, b, c) \
5613 __extension__ \
5614 ({ \
5615 poly16x8_t b_ = (b); \
5616 poly16x8_t a_ = (a); \
5617 poly16x8_t result; \
5618 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5619 : "=w"(result) \
5620 : "w"(a_), "w"(b_), "i"(c) \
5621 : /* No clobbers */); \
5622 result; \
5623 })
5624
5625 #define vextq_s8(a, b, c) \
5626 __extension__ \
5627 ({ \
5628 int8x16_t b_ = (b); \
5629 int8x16_t a_ = (a); \
5630 int8x16_t result; \
5631 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5632 : "=w"(result) \
5633 : "w"(a_), "w"(b_), "i"(c) \
5634 : /* No clobbers */); \
5635 result; \
5636 })
5637
5638 #define vextq_s16(a, b, c) \
5639 __extension__ \
5640 ({ \
5641 int16x8_t b_ = (b); \
5642 int16x8_t a_ = (a); \
5643 int16x8_t result; \
5644 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5645 : "=w"(result) \
5646 : "w"(a_), "w"(b_), "i"(c) \
5647 : /* No clobbers */); \
5648 result; \
5649 })
5650
5651 #define vextq_s32(a, b, c) \
5652 __extension__ \
5653 ({ \
5654 int32x4_t b_ = (b); \
5655 int32x4_t a_ = (a); \
5656 int32x4_t result; \
5657 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5658 : "=w"(result) \
5659 : "w"(a_), "w"(b_), "i"(c) \
5660 : /* No clobbers */); \
5661 result; \
5662 })
5663
5664 #define vextq_s64(a, b, c) \
5665 __extension__ \
5666 ({ \
5667 int64x2_t b_ = (b); \
5668 int64x2_t a_ = (a); \
5669 int64x2_t result; \
5670 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5671 : "=w"(result) \
5672 : "w"(a_), "w"(b_), "i"(c) \
5673 : /* No clobbers */); \
5674 result; \
5675 })
5676
5677 #define vextq_u8(a, b, c) \
5678 __extension__ \
5679 ({ \
5680 uint8x16_t b_ = (b); \
5681 uint8x16_t a_ = (a); \
5682 uint8x16_t result; \
5683 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5684 : "=w"(result) \
5685 : "w"(a_), "w"(b_), "i"(c) \
5686 : /* No clobbers */); \
5687 result; \
5688 })
5689
5690 #define vextq_u16(a, b, c) \
5691 __extension__ \
5692 ({ \
5693 uint16x8_t b_ = (b); \
5694 uint16x8_t a_ = (a); \
5695 uint16x8_t result; \
5696 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5697 : "=w"(result) \
5698 : "w"(a_), "w"(b_), "i"(c) \
5699 : /* No clobbers */); \
5700 result; \
5701 })
5702
5703 #define vextq_u32(a, b, c) \
5704 __extension__ \
5705 ({ \
5706 uint32x4_t b_ = (b); \
5707 uint32x4_t a_ = (a); \
5708 uint32x4_t result; \
5709 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5710 : "=w"(result) \
5711 : "w"(a_), "w"(b_), "i"(c) \
5712 : /* No clobbers */); \
5713 result; \
5714 })
5715
5716 #define vextq_u64(a, b, c) \
5717 __extension__ \
5718 ({ \
5719 uint64x2_t b_ = (b); \
5720 uint64x2_t a_ = (a); \
5721 uint64x2_t result; \
5722 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5723 : "=w"(result) \
5724 : "w"(a_), "w"(b_), "i"(c) \
5725 : /* No clobbers */); \
5726 result; \
5727 })
5728
5729 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5730 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5731 {
5732 float32x2_t result;
5733 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5734 : "=w"(result)
5735 : "0"(a), "w"(b), "w"(c)
5736 : /* No clobbers */);
5737 return result;
5738 }
5739
5740 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5741 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5742 {
5743 float32x4_t result;
5744 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5745 : "=w"(result)
5746 : "0"(a), "w"(b), "w"(c)
5747 : /* No clobbers */);
5748 return result;
5749 }
5750
5751 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5752 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5753 {
5754 float64x2_t result;
5755 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5756 : "=w"(result)
5757 : "0"(a), "w"(b), "w"(c)
5758 : /* No clobbers */);
5759 return result;
5760 }
5761
5762 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5763 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5764 {
5765 float32x2_t result;
5766 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5767 : "=w"(result)
5768 : "0"(a), "w"(b), "w"(c)
5769 : /* No clobbers */);
5770 return result;
5771 }
5772
5773 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5774 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5775 {
5776 float32x4_t result;
5777 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5778 : "=w"(result)
5779 : "0"(a), "w"(b), "w"(c)
5780 : /* No clobbers */);
5781 return result;
5782 }
5783
5784 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5785 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5786 {
5787 float64x2_t result;
5788 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5789 : "=w"(result)
5790 : "0"(a), "w"(b), "w"(c)
5791 : /* No clobbers */);
5792 return result;
5793 }
5794
5795 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5796 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5797 {
5798 float32x2_t result;
5799 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5800 : "=w"(result)
5801 : "0"(a), "w"(b), "w"(c)
5802 : /* No clobbers */);
5803 return result;
5804 }
5805
5806 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5807 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5808 {
5809 float32x4_t result;
5810 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5811 : "=w"(result)
5812 : "0"(a), "w"(b), "w"(c)
5813 : /* No clobbers */);
5814 return result;
5815 }
5816
5817 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5818 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5819 {
5820 float64x2_t result;
5821 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5822 : "=w"(result)
5823 : "0"(a), "w"(b), "w"(c)
5824 : /* No clobbers */);
5825 return result;
5826 }
5827
5828 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5829 vget_high_f32 (float32x4_t a)
5830 {
5831 float32x2_t result;
5832 __asm__ ("ins %0.d[0], %1.d[1]"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5837 }
5838
5839 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5840 vget_high_f64 (float64x2_t a)
5841 {
5842 float64x1_t result;
5843 __asm__ ("ins %0.d[0], %1.d[1]"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5848 }
5849
5850 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5851 vget_high_p8 (poly8x16_t a)
5852 {
5853 poly8x8_t result;
5854 __asm__ ("ins %0.d[0], %1.d[1]"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5859 }
5860
5861 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5862 vget_high_p16 (poly16x8_t a)
5863 {
5864 poly16x4_t result;
5865 __asm__ ("ins %0.d[0], %1.d[1]"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5870 }
5871
5872 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5873 vget_high_s8 (int8x16_t a)
5874 {
5875 int8x8_t result;
5876 __asm__ ("ins %0.d[0], %1.d[1]"
5877 : "=w"(result)
5878 : "w"(a)
5879 : /* No clobbers */);
5880 return result;
5881 }
5882
5883 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5884 vget_high_s16 (int16x8_t a)
5885 {
5886 int16x4_t result;
5887 __asm__ ("ins %0.d[0], %1.d[1]"
5888 : "=w"(result)
5889 : "w"(a)
5890 : /* No clobbers */);
5891 return result;
5892 }
5893
5894 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5895 vget_high_s32 (int32x4_t a)
5896 {
5897 int32x2_t result;
5898 __asm__ ("ins %0.d[0], %1.d[1]"
5899 : "=w"(result)
5900 : "w"(a)
5901 : /* No clobbers */);
5902 return result;
5903 }
5904
5905 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5906 vget_high_s64 (int64x2_t a)
5907 {
5908 int64x1_t result;
5909 __asm__ ("ins %0.d[0], %1.d[1]"
5910 : "=w"(result)
5911 : "w"(a)
5912 : /* No clobbers */);
5913 return result;
5914 }
5915
5916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5917 vget_high_u8 (uint8x16_t a)
5918 {
5919 uint8x8_t result;
5920 __asm__ ("ins %0.d[0], %1.d[1]"
5921 : "=w"(result)
5922 : "w"(a)
5923 : /* No clobbers */);
5924 return result;
5925 }
5926
5927 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5928 vget_high_u16 (uint16x8_t a)
5929 {
5930 uint16x4_t result;
5931 __asm__ ("ins %0.d[0], %1.d[1]"
5932 : "=w"(result)
5933 : "w"(a)
5934 : /* No clobbers */);
5935 return result;
5936 }
5937
5938 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5939 vget_high_u32 (uint32x4_t a)
5940 {
5941 uint32x2_t result;
5942 __asm__ ("ins %0.d[0], %1.d[1]"
5943 : "=w"(result)
5944 : "w"(a)
5945 : /* No clobbers */);
5946 return result;
5947 }
5948
5949 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5950 vget_high_u64 (uint64x2_t a)
5951 {
5952 uint64x1_t result;
5953 __asm__ ("ins %0.d[0], %1.d[1]"
5954 : "=w"(result)
5955 : "w"(a)
5956 : /* No clobbers */);
5957 return result;
5958 }
5959
5960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5961 vhsub_s8 (int8x8_t a, int8x8_t b)
5962 {
5963 int8x8_t result;
5964 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5965 : "=w"(result)
5966 : "w"(a), "w"(b)
5967 : /* No clobbers */);
5968 return result;
5969 }
5970
5971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5972 vhsub_s16 (int16x4_t a, int16x4_t b)
5973 {
5974 int16x4_t result;
5975 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5976 : "=w"(result)
5977 : "w"(a), "w"(b)
5978 : /* No clobbers */);
5979 return result;
5980 }
5981
5982 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5983 vhsub_s32 (int32x2_t a, int32x2_t b)
5984 {
5985 int32x2_t result;
5986 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5987 : "=w"(result)
5988 : "w"(a), "w"(b)
5989 : /* No clobbers */);
5990 return result;
5991 }
5992
5993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5994 vhsub_u8 (uint8x8_t a, uint8x8_t b)
5995 {
5996 uint8x8_t result;
5997 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5998 : "=w"(result)
5999 : "w"(a), "w"(b)
6000 : /* No clobbers */);
6001 return result;
6002 }
6003
6004 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6005 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6006 {
6007 uint16x4_t result;
6008 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6009 : "=w"(result)
6010 : "w"(a), "w"(b)
6011 : /* No clobbers */);
6012 return result;
6013 }
6014
6015 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6016 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6017 {
6018 uint32x2_t result;
6019 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6020 : "=w"(result)
6021 : "w"(a), "w"(b)
6022 : /* No clobbers */);
6023 return result;
6024 }
6025
6026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6027 vhsubq_s8 (int8x16_t a, int8x16_t b)
6028 {
6029 int8x16_t result;
6030 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6031 : "=w"(result)
6032 : "w"(a), "w"(b)
6033 : /* No clobbers */);
6034 return result;
6035 }
6036
6037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6038 vhsubq_s16 (int16x8_t a, int16x8_t b)
6039 {
6040 int16x8_t result;
6041 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6042 : "=w"(result)
6043 : "w"(a), "w"(b)
6044 : /* No clobbers */);
6045 return result;
6046 }
6047
6048 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6049 vhsubq_s32 (int32x4_t a, int32x4_t b)
6050 {
6051 int32x4_t result;
6052 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6053 : "=w"(result)
6054 : "w"(a), "w"(b)
6055 : /* No clobbers */);
6056 return result;
6057 }
6058
6059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6060 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6061 {
6062 uint8x16_t result;
6063 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6064 : "=w"(result)
6065 : "w"(a), "w"(b)
6066 : /* No clobbers */);
6067 return result;
6068 }
6069
6070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6071 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6072 {
6073 uint16x8_t result;
6074 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6075 : "=w"(result)
6076 : "w"(a), "w"(b)
6077 : /* No clobbers */);
6078 return result;
6079 }
6080
6081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6082 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6083 {
6084 uint32x4_t result;
6085 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6086 : "=w"(result)
6087 : "w"(a), "w"(b)
6088 : /* No clobbers */);
6089 return result;
6090 }
6091
6092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6093 vld1_dup_f32 (const float32_t * a)
6094 {
6095 float32x2_t result;
6096 __asm__ ("ld1r {%0.2s}, %1"
6097 : "=w"(result)
6098 : "Utv"(*a)
6099 : /* No clobbers */);
6100 return result;
6101 }
6102
6103 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6104 vld1_dup_f64 (const float64_t * a)
6105 {
6106 float64x1_t result;
6107 __asm__ ("ld1r {%0.1d}, %1"
6108 : "=w"(result)
6109 : "Utv"(*a)
6110 : /* No clobbers */);
6111 return result;
6112 }
6113
6114 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6115 vld1_dup_p8 (const poly8_t * a)
6116 {
6117 poly8x8_t result;
6118 __asm__ ("ld1r {%0.8b}, %1"
6119 : "=w"(result)
6120 : "Utv"(*a)
6121 : /* No clobbers */);
6122 return result;
6123 }
6124
6125 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6126 vld1_dup_p16 (const poly16_t * a)
6127 {
6128 poly16x4_t result;
6129 __asm__ ("ld1r {%0.4h}, %1"
6130 : "=w"(result)
6131 : "Utv"(*a)
6132 : /* No clobbers */);
6133 return result;
6134 }
6135
6136 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6137 vld1_dup_s8 (const int8_t * a)
6138 {
6139 int8x8_t result;
6140 __asm__ ("ld1r {%0.8b}, %1"
6141 : "=w"(result)
6142 : "Utv"(*a)
6143 : /* No clobbers */);
6144 return result;
6145 }
6146
6147 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6148 vld1_dup_s16 (const int16_t * a)
6149 {
6150 int16x4_t result;
6151 __asm__ ("ld1r {%0.4h}, %1"
6152 : "=w"(result)
6153 : "Utv"(*a)
6154 : /* No clobbers */);
6155 return result;
6156 }
6157
6158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6159 vld1_dup_s32 (const int32_t * a)
6160 {
6161 int32x2_t result;
6162 __asm__ ("ld1r {%0.2s}, %1"
6163 : "=w"(result)
6164 : "Utv"(*a)
6165 : /* No clobbers */);
6166 return result;
6167 }
6168
6169 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6170 vld1_dup_s64 (const int64_t * a)
6171 {
6172 int64x1_t result;
6173 __asm__ ("ld1r {%0.1d}, %1"
6174 : "=w"(result)
6175 : "Utv"(*a)
6176 : /* No clobbers */);
6177 return result;
6178 }
6179
6180 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6181 vld1_dup_u8 (const uint8_t * a)
6182 {
6183 uint8x8_t result;
6184 __asm__ ("ld1r {%0.8b}, %1"
6185 : "=w"(result)
6186 : "Utv"(*a)
6187 : /* No clobbers */);
6188 return result;
6189 }
6190
6191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6192 vld1_dup_u16 (const uint16_t * a)
6193 {
6194 uint16x4_t result;
6195 __asm__ ("ld1r {%0.4h}, %1"
6196 : "=w"(result)
6197 : "Utv"(*a)
6198 : /* No clobbers */);
6199 return result;
6200 }
6201
6202 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6203 vld1_dup_u32 (const uint32_t * a)
6204 {
6205 uint32x2_t result;
6206 __asm__ ("ld1r {%0.2s}, %1"
6207 : "=w"(result)
6208 : "Utv"(*a)
6209 : /* No clobbers */);
6210 return result;
6211 }
6212
6213 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6214 vld1_dup_u64 (const uint64_t * a)
6215 {
6216 uint64x1_t result;
6217 __asm__ ("ld1r {%0.1d}, %1"
6218 : "=w"(result)
6219 : "Utv"(*a)
6220 : /* No clobbers */);
6221 return result;
6222 }
6223
6224 #define vld1_lane_f32(a, b, c) \
6225 __extension__ \
6226 ({ \
6227 float32x2_t b_ = (b); \
6228 const float32_t * a_ = (a); \
6229 float32x2_t result; \
6230 __asm__ ("ld1 {%0.s}[%1], %2" \
6231 : "=w"(result) \
6232 : "i" (c), "Utv"(*a_), "0"(b_) \
6233 : /* No clobbers */); \
6234 result; \
6235 })
6236
6237 #define vld1_lane_f64(a, b, c) \
6238 __extension__ \
6239 ({ \
6240 float64x1_t b_ = (b); \
6241 const float64_t * a_ = (a); \
6242 float64x1_t result; \
6243 __asm__ ("ld1 {%0.d}[%1], %2" \
6244 : "=w"(result) \
6245 : "i" (c), "Utv"(*a_), "0"(b_) \
6246 : /* No clobbers */); \
6247 result; \
6248 })
6249
6250 #define vld1_lane_p8(a, b, c) \
6251 __extension__ \
6252 ({ \
6253 poly8x8_t b_ = (b); \
6254 const poly8_t * a_ = (a); \
6255 poly8x8_t result; \
6256 __asm__ ("ld1 {%0.b}[%1], %2" \
6257 : "=w"(result) \
6258 : "i" (c), "Utv"(*a_), "0"(b_) \
6259 : /* No clobbers */); \
6260 result; \
6261 })
6262
6263 #define vld1_lane_p16(a, b, c) \
6264 __extension__ \
6265 ({ \
6266 poly16x4_t b_ = (b); \
6267 const poly16_t * a_ = (a); \
6268 poly16x4_t result; \
6269 __asm__ ("ld1 {%0.h}[%1], %2" \
6270 : "=w"(result) \
6271 : "i" (c), "Utv"(*a_), "0"(b_) \
6272 : /* No clobbers */); \
6273 result; \
6274 })
6275
6276 #define vld1_lane_s8(a, b, c) \
6277 __extension__ \
6278 ({ \
6279 int8x8_t b_ = (b); \
6280 const int8_t * a_ = (a); \
6281 int8x8_t result; \
6282 __asm__ ("ld1 {%0.b}[%1], %2" \
6283 : "=w"(result) \
6284 : "i" (c), "Utv"(*a_), "0"(b_) \
6285 : /* No clobbers */); \
6286 result; \
6287 })
6288
6289 #define vld1_lane_s16(a, b, c) \
6290 __extension__ \
6291 ({ \
6292 int16x4_t b_ = (b); \
6293 const int16_t * a_ = (a); \
6294 int16x4_t result; \
6295 __asm__ ("ld1 {%0.h}[%1], %2" \
6296 : "=w"(result) \
6297 : "i" (c), "Utv"(*a_), "0"(b_) \
6298 : /* No clobbers */); \
6299 result; \
6300 })
6301
6302 #define vld1_lane_s32(a, b, c) \
6303 __extension__ \
6304 ({ \
6305 int32x2_t b_ = (b); \
6306 const int32_t * a_ = (a); \
6307 int32x2_t result; \
6308 __asm__ ("ld1 {%0.s}[%1], %2" \
6309 : "=w"(result) \
6310 : "i" (c), "Utv"(*a_), "0"(b_) \
6311 : /* No clobbers */); \
6312 result; \
6313 })
6314
6315 #define vld1_lane_s64(a, b, c) \
6316 __extension__ \
6317 ({ \
6318 int64x1_t b_ = (b); \
6319 const int64_t * a_ = (a); \
6320 int64x1_t result; \
6321 __asm__ ("ld1 {%0.d}[%1], %2" \
6322 : "=w"(result) \
6323 : "i" (c), "Utv"(*a_), "0"(b_) \
6324 : /* No clobbers */); \
6325 result; \
6326 })
6327
6328 #define vld1_lane_u8(a, b, c) \
6329 __extension__ \
6330 ({ \
6331 uint8x8_t b_ = (b); \
6332 const uint8_t * a_ = (a); \
6333 uint8x8_t result; \
6334 __asm__ ("ld1 {%0.b}[%1], %2" \
6335 : "=w"(result) \
6336 : "i" (c), "Utv"(*a_), "0"(b_) \
6337 : /* No clobbers */); \
6338 result; \
6339 })
6340
6341 #define vld1_lane_u16(a, b, c) \
6342 __extension__ \
6343 ({ \
6344 uint16x4_t b_ = (b); \
6345 const uint16_t * a_ = (a); \
6346 uint16x4_t result; \
6347 __asm__ ("ld1 {%0.h}[%1], %2" \
6348 : "=w"(result) \
6349 : "i" (c), "Utv"(*a_), "0"(b_) \
6350 : /* No clobbers */); \
6351 result; \
6352 })
6353
6354 #define vld1_lane_u32(a, b, c) \
6355 __extension__ \
6356 ({ \
6357 uint32x2_t b_ = (b); \
6358 const uint32_t * a_ = (a); \
6359 uint32x2_t result; \
6360 __asm__ ("ld1 {%0.s}[%1], %2" \
6361 : "=w"(result) \
6362 : "i" (c), "Utv"(*a_), "0"(b_) \
6363 : /* No clobbers */); \
6364 result; \
6365 })
6366
6367 #define vld1_lane_u64(a, b, c) \
6368 __extension__ \
6369 ({ \
6370 uint64x1_t b_ = (b); \
6371 const uint64_t * a_ = (a); \
6372 uint64x1_t result; \
6373 __asm__ ("ld1 {%0.d}[%1], %2" \
6374 : "=w"(result) \
6375 : "i" (c), "Utv"(*a_), "0"(b_) \
6376 : /* No clobbers */); \
6377 result; \
6378 })
6379
6380 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6381 vld1q_dup_f32 (const float32_t * a)
6382 {
6383 float32x4_t result;
6384 __asm__ ("ld1r {%0.4s}, %1"
6385 : "=w"(result)
6386 : "Utv"(*a)
6387 : /* No clobbers */);
6388 return result;
6389 }
6390
6391 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6392 vld1q_dup_f64 (const float64_t * a)
6393 {
6394 float64x2_t result;
6395 __asm__ ("ld1r {%0.2d}, %1"
6396 : "=w"(result)
6397 : "Utv"(*a)
6398 : /* No clobbers */);
6399 return result;
6400 }
6401
6402 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6403 vld1q_dup_p8 (const poly8_t * a)
6404 {
6405 poly8x16_t result;
6406 __asm__ ("ld1r {%0.16b}, %1"
6407 : "=w"(result)
6408 : "Utv"(*a)
6409 : /* No clobbers */);
6410 return result;
6411 }
6412
6413 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6414 vld1q_dup_p16 (const poly16_t * a)
6415 {
6416 poly16x8_t result;
6417 __asm__ ("ld1r {%0.8h}, %1"
6418 : "=w"(result)
6419 : "Utv"(*a)
6420 : /* No clobbers */);
6421 return result;
6422 }
6423
6424 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6425 vld1q_dup_s8 (const int8_t * a)
6426 {
6427 int8x16_t result;
6428 __asm__ ("ld1r {%0.16b}, %1"
6429 : "=w"(result)
6430 : "Utv"(*a)
6431 : /* No clobbers */);
6432 return result;
6433 }
6434
6435 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6436 vld1q_dup_s16 (const int16_t * a)
6437 {
6438 int16x8_t result;
6439 __asm__ ("ld1r {%0.8h}, %1"
6440 : "=w"(result)
6441 : "Utv"(*a)
6442 : /* No clobbers */);
6443 return result;
6444 }
6445
6446 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6447 vld1q_dup_s32 (const int32_t * a)
6448 {
6449 int32x4_t result;
6450 __asm__ ("ld1r {%0.4s}, %1"
6451 : "=w"(result)
6452 : "Utv"(*a)
6453 : /* No clobbers */);
6454 return result;
6455 }
6456
6457 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6458 vld1q_dup_s64 (const int64_t * a)
6459 {
6460 int64x2_t result;
6461 __asm__ ("ld1r {%0.2d}, %1"
6462 : "=w"(result)
6463 : "Utv"(*a)
6464 : /* No clobbers */);
6465 return result;
6466 }
6467
6468 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6469 vld1q_dup_u8 (const uint8_t * a)
6470 {
6471 uint8x16_t result;
6472 __asm__ ("ld1r {%0.16b}, %1"
6473 : "=w"(result)
6474 : "Utv"(*a)
6475 : /* No clobbers */);
6476 return result;
6477 }
6478
6479 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6480 vld1q_dup_u16 (const uint16_t * a)
6481 {
6482 uint16x8_t result;
6483 __asm__ ("ld1r {%0.8h}, %1"
6484 : "=w"(result)
6485 : "Utv"(*a)
6486 : /* No clobbers */);
6487 return result;
6488 }
6489
6490 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6491 vld1q_dup_u32 (const uint32_t * a)
6492 {
6493 uint32x4_t result;
6494 __asm__ ("ld1r {%0.4s}, %1"
6495 : "=w"(result)
6496 : "Utv"(*a)
6497 : /* No clobbers */);
6498 return result;
6499 }
6500
6501 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6502 vld1q_dup_u64 (const uint64_t * a)
6503 {
6504 uint64x2_t result;
6505 __asm__ ("ld1r {%0.2d}, %1"
6506 : "=w"(result)
6507 : "Utv"(*a)
6508 : /* No clobbers */);
6509 return result;
6510 }
6511
6512 #define vld1q_lane_f32(a, b, c) \
6513 __extension__ \
6514 ({ \
6515 float32x4_t b_ = (b); \
6516 const float32_t * a_ = (a); \
6517 float32x4_t result; \
6518 __asm__ ("ld1 {%0.s}[%1], %2" \
6519 : "=w"(result) \
6520 : "i"(c), "Utv"(*a_), "0"(b_) \
6521 : /* No clobbers */); \
6522 result; \
6523 })
6524
6525 #define vld1q_lane_f64(a, b, c) \
6526 __extension__ \
6527 ({ \
6528 float64x2_t b_ = (b); \
6529 const float64_t * a_ = (a); \
6530 float64x2_t result; \
6531 __asm__ ("ld1 {%0.d}[%1], %2" \
6532 : "=w"(result) \
6533 : "i"(c), "Utv"(*a_), "0"(b_) \
6534 : /* No clobbers */); \
6535 result; \
6536 })
6537
6538 #define vld1q_lane_p8(a, b, c) \
6539 __extension__ \
6540 ({ \
6541 poly8x16_t b_ = (b); \
6542 const poly8_t * a_ = (a); \
6543 poly8x16_t result; \
6544 __asm__ ("ld1 {%0.b}[%1], %2" \
6545 : "=w"(result) \
6546 : "i"(c), "Utv"(*a_), "0"(b_) \
6547 : /* No clobbers */); \
6548 result; \
6549 })
6550
6551 #define vld1q_lane_p16(a, b, c) \
6552 __extension__ \
6553 ({ \
6554 poly16x8_t b_ = (b); \
6555 const poly16_t * a_ = (a); \
6556 poly16x8_t result; \
6557 __asm__ ("ld1 {%0.h}[%1], %2" \
6558 : "=w"(result) \
6559 : "i"(c), "Utv"(*a_), "0"(b_) \
6560 : /* No clobbers */); \
6561 result; \
6562 })
6563
6564 #define vld1q_lane_s8(a, b, c) \
6565 __extension__ \
6566 ({ \
6567 int8x16_t b_ = (b); \
6568 const int8_t * a_ = (a); \
6569 int8x16_t result; \
6570 __asm__ ("ld1 {%0.b}[%1], %2" \
6571 : "=w"(result) \
6572 : "i"(c), "Utv"(*a_), "0"(b_) \
6573 : /* No clobbers */); \
6574 result; \
6575 })
6576
6577 #define vld1q_lane_s16(a, b, c) \
6578 __extension__ \
6579 ({ \
6580 int16x8_t b_ = (b); \
6581 const int16_t * a_ = (a); \
6582 int16x8_t result; \
6583 __asm__ ("ld1 {%0.h}[%1], %2" \
6584 : "=w"(result) \
6585 : "i"(c), "Utv"(*a_), "0"(b_) \
6586 : /* No clobbers */); \
6587 result; \
6588 })
6589
6590 #define vld1q_lane_s32(a, b, c) \
6591 __extension__ \
6592 ({ \
6593 int32x4_t b_ = (b); \
6594 const int32_t * a_ = (a); \
6595 int32x4_t result; \
6596 __asm__ ("ld1 {%0.s}[%1], %2" \
6597 : "=w"(result) \
6598 : "i"(c), "Utv"(*a_), "0"(b_) \
6599 : /* No clobbers */); \
6600 result; \
6601 })
6602
6603 #define vld1q_lane_s64(a, b, c) \
6604 __extension__ \
6605 ({ \
6606 int64x2_t b_ = (b); \
6607 const int64_t * a_ = (a); \
6608 int64x2_t result; \
6609 __asm__ ("ld1 {%0.d}[%1], %2" \
6610 : "=w"(result) \
6611 : "i"(c), "Utv"(*a_), "0"(b_) \
6612 : /* No clobbers */); \
6613 result; \
6614 })
6615
6616 #define vld1q_lane_u8(a, b, c) \
6617 __extension__ \
6618 ({ \
6619 uint8x16_t b_ = (b); \
6620 const uint8_t * a_ = (a); \
6621 uint8x16_t result; \
6622 __asm__ ("ld1 {%0.b}[%1], %2" \
6623 : "=w"(result) \
6624 : "i"(c), "Utv"(*a_), "0"(b_) \
6625 : /* No clobbers */); \
6626 result; \
6627 })
6628
6629 #define vld1q_lane_u16(a, b, c) \
6630 __extension__ \
6631 ({ \
6632 uint16x8_t b_ = (b); \
6633 const uint16_t * a_ = (a); \
6634 uint16x8_t result; \
6635 __asm__ ("ld1 {%0.h}[%1], %2" \
6636 : "=w"(result) \
6637 : "i"(c), "Utv"(*a_), "0"(b_) \
6638 : /* No clobbers */); \
6639 result; \
6640 })
6641
6642 #define vld1q_lane_u32(a, b, c) \
6643 __extension__ \
6644 ({ \
6645 uint32x4_t b_ = (b); \
6646 const uint32_t * a_ = (a); \
6647 uint32x4_t result; \
6648 __asm__ ("ld1 {%0.s}[%1], %2" \
6649 : "=w"(result) \
6650 : "i"(c), "Utv"(*a_), "0"(b_) \
6651 : /* No clobbers */); \
6652 result; \
6653 })
6654
6655 #define vld1q_lane_u64(a, b, c) \
6656 __extension__ \
6657 ({ \
6658 uint64x2_t b_ = (b); \
6659 const uint64_t * a_ = (a); \
6660 uint64x2_t result; \
6661 __asm__ ("ld1 {%0.d}[%1], %2" \
6662 : "=w"(result) \
6663 : "i"(c), "Utv"(*a_), "0"(b_) \
6664 : /* No clobbers */); \
6665 result; \
6666 })
6667
6668 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6669 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6670 {
6671 float32x2_t result;
6672 float32x2_t t1;
6673 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6674 : "=w"(result), "=w"(t1)
6675 : "0"(a), "w"(b), "w"(c)
6676 : /* No clobbers */);
6677 return result;
6678 }
6679
6680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6681 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6682 {
6683 int16x4_t result;
6684 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6685 : "=w"(result)
6686 : "0"(a), "w"(b), "x"(c)
6687 : /* No clobbers */);
6688 return result;
6689 }
6690
6691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6692 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6693 {
6694 int32x2_t result;
6695 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6696 : "=w"(result)
6697 : "0"(a), "w"(b), "w"(c)
6698 : /* No clobbers */);
6699 return result;
6700 }
6701
6702 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6703 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6704 {
6705 uint16x4_t result;
6706 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6707 : "=w"(result)
6708 : "0"(a), "w"(b), "x"(c)
6709 : /* No clobbers */);
6710 return result;
6711 }
6712
6713 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6714 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6715 {
6716 uint32x2_t result;
6717 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6718 : "=w"(result)
6719 : "0"(a), "w"(b), "w"(c)
6720 : /* No clobbers */);
6721 return result;
6722 }
6723
6724 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6725 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6726 {
6727 int8x8_t result;
6728 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6729 : "=w"(result)
6730 : "0"(a), "w"(b), "w"(c)
6731 : /* No clobbers */);
6732 return result;
6733 }
6734
6735 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6736 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6737 {
6738 int16x4_t result;
6739 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6740 : "=w"(result)
6741 : "0"(a), "w"(b), "w"(c)
6742 : /* No clobbers */);
6743 return result;
6744 }
6745
6746 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6747 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6748 {
6749 int32x2_t result;
6750 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6751 : "=w"(result)
6752 : "0"(a), "w"(b), "w"(c)
6753 : /* No clobbers */);
6754 return result;
6755 }
6756
6757 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6758 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6759 {
6760 uint8x8_t result;
6761 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6762 : "=w"(result)
6763 : "0"(a), "w"(b), "w"(c)
6764 : /* No clobbers */);
6765 return result;
6766 }
6767
6768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6769 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6770 {
6771 uint16x4_t result;
6772 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6773 : "=w"(result)
6774 : "0"(a), "w"(b), "w"(c)
6775 : /* No clobbers */);
6776 return result;
6777 }
6778
6779 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6780 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6781 {
6782 uint32x2_t result;
6783 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6784 : "=w"(result)
6785 : "0"(a), "w"(b), "w"(c)
6786 : /* No clobbers */);
6787 return result;
6788 }
6789
6790 #define vmlal_high_lane_s16(a, b, c, d) \
6791 __extension__ \
6792 ({ \
6793 int16x8_t c_ = (c); \
6794 int16x8_t b_ = (b); \
6795 int32x4_t a_ = (a); \
6796 int32x4_t result; \
6797 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6798 : "=w"(result) \
6799 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6800 : /* No clobbers */); \
6801 result; \
6802 })
6803
6804 #define vmlal_high_lane_s32(a, b, c, d) \
6805 __extension__ \
6806 ({ \
6807 int32x4_t c_ = (c); \
6808 int32x4_t b_ = (b); \
6809 int64x2_t a_ = (a); \
6810 int64x2_t result; \
6811 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6812 : "=w"(result) \
6813 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6814 : /* No clobbers */); \
6815 result; \
6816 })
6817
6818 #define vmlal_high_lane_u16(a, b, c, d) \
6819 __extension__ \
6820 ({ \
6821 uint16x8_t c_ = (c); \
6822 uint16x8_t b_ = (b); \
6823 uint32x4_t a_ = (a); \
6824 uint32x4_t result; \
6825 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6826 : "=w"(result) \
6827 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6828 : /* No clobbers */); \
6829 result; \
6830 })
6831
6832 #define vmlal_high_lane_u32(a, b, c, d) \
6833 __extension__ \
6834 ({ \
6835 uint32x4_t c_ = (c); \
6836 uint32x4_t b_ = (b); \
6837 uint64x2_t a_ = (a); \
6838 uint64x2_t result; \
6839 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6840 : "=w"(result) \
6841 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6842 : /* No clobbers */); \
6843 result; \
6844 })
6845
6846 #define vmlal_high_laneq_s16(a, b, c, d) \
6847 __extension__ \
6848 ({ \
6849 int16x8_t c_ = (c); \
6850 int16x8_t b_ = (b); \
6851 int32x4_t a_ = (a); \
6852 int32x4_t result; \
6853 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6854 : "=w"(result) \
6855 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6856 : /* No clobbers */); \
6857 result; \
6858 })
6859
6860 #define vmlal_high_laneq_s32(a, b, c, d) \
6861 __extension__ \
6862 ({ \
6863 int32x4_t c_ = (c); \
6864 int32x4_t b_ = (b); \
6865 int64x2_t a_ = (a); \
6866 int64x2_t result; \
6867 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6868 : "=w"(result) \
6869 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6870 : /* No clobbers */); \
6871 result; \
6872 })
6873
6874 #define vmlal_high_laneq_u16(a, b, c, d) \
6875 __extension__ \
6876 ({ \
6877 uint16x8_t c_ = (c); \
6878 uint16x8_t b_ = (b); \
6879 uint32x4_t a_ = (a); \
6880 uint32x4_t result; \
6881 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6882 : "=w"(result) \
6883 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6884 : /* No clobbers */); \
6885 result; \
6886 })
6887
6888 #define vmlal_high_laneq_u32(a, b, c, d) \
6889 __extension__ \
6890 ({ \
6891 uint32x4_t c_ = (c); \
6892 uint32x4_t b_ = (b); \
6893 uint64x2_t a_ = (a); \
6894 uint64x2_t result; \
6895 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6896 : "=w"(result) \
6897 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6898 : /* No clobbers */); \
6899 result; \
6900 })
6901
6902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6903 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6904 {
6905 int32x4_t result;
6906 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6907 : "=w"(result)
6908 : "0"(a), "w"(b), "x"(c)
6909 : /* No clobbers */);
6910 return result;
6911 }
6912
6913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6914 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6915 {
6916 int64x2_t result;
6917 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6918 : "=w"(result)
6919 : "0"(a), "w"(b), "w"(c)
6920 : /* No clobbers */);
6921 return result;
6922 }
6923
6924 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6925 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6926 {
6927 uint32x4_t result;
6928 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6929 : "=w"(result)
6930 : "0"(a), "w"(b), "x"(c)
6931 : /* No clobbers */);
6932 return result;
6933 }
6934
6935 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6936 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6937 {
6938 uint64x2_t result;
6939 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6940 : "=w"(result)
6941 : "0"(a), "w"(b), "w"(c)
6942 : /* No clobbers */);
6943 return result;
6944 }
6945
6946 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6947 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6948 {
6949 int16x8_t result;
6950 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6951 : "=w"(result)
6952 : "0"(a), "w"(b), "w"(c)
6953 : /* No clobbers */);
6954 return result;
6955 }
6956
6957 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6958 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6959 {
6960 int32x4_t result;
6961 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6962 : "=w"(result)
6963 : "0"(a), "w"(b), "w"(c)
6964 : /* No clobbers */);
6965 return result;
6966 }
6967
6968 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6969 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6970 {
6971 int64x2_t result;
6972 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6973 : "=w"(result)
6974 : "0"(a), "w"(b), "w"(c)
6975 : /* No clobbers */);
6976 return result;
6977 }
6978
6979 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6980 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6981 {
6982 uint16x8_t result;
6983 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6984 : "=w"(result)
6985 : "0"(a), "w"(b), "w"(c)
6986 : /* No clobbers */);
6987 return result;
6988 }
6989
6990 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6991 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6992 {
6993 uint32x4_t result;
6994 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6995 : "=w"(result)
6996 : "0"(a), "w"(b), "w"(c)
6997 : /* No clobbers */);
6998 return result;
6999 }
7000
7001 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7002 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7003 {
7004 uint64x2_t result;
7005 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7006 : "=w"(result)
7007 : "0"(a), "w"(b), "w"(c)
7008 : /* No clobbers */);
7009 return result;
7010 }
7011
7012 #define vmlal_lane_s16(a, b, c, d) \
7013 __extension__ \
7014 ({ \
7015 int16x4_t c_ = (c); \
7016 int16x4_t b_ = (b); \
7017 int32x4_t a_ = (a); \
7018 int32x4_t result; \
7019 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7020 : "=w"(result) \
7021 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7022 : /* No clobbers */); \
7023 result; \
7024 })
7025
7026 #define vmlal_lane_s32(a, b, c, d) \
7027 __extension__ \
7028 ({ \
7029 int32x2_t c_ = (c); \
7030 int32x2_t b_ = (b); \
7031 int64x2_t a_ = (a); \
7032 int64x2_t result; \
7033 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7034 : "=w"(result) \
7035 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7036 : /* No clobbers */); \
7037 result; \
7038 })
7039
7040 #define vmlal_lane_u16(a, b, c, d) \
7041 __extension__ \
7042 ({ \
7043 uint16x4_t c_ = (c); \
7044 uint16x4_t b_ = (b); \
7045 uint32x4_t a_ = (a); \
7046 uint32x4_t result; \
7047 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7048 : "=w"(result) \
7049 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7050 : /* No clobbers */); \
7051 result; \
7052 })
7053
7054 #define vmlal_lane_u32(a, b, c, d) \
7055 __extension__ \
7056 ({ \
7057 uint32x2_t c_ = (c); \
7058 uint32x2_t b_ = (b); \
7059 uint64x2_t a_ = (a); \
7060 uint64x2_t result; \
7061 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7062 : "=w"(result) \
7063 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7064 : /* No clobbers */); \
7065 result; \
7066 })
7067
7068 #define vmlal_laneq_s16(a, b, c, d) \
7069 __extension__ \
7070 ({ \
7071 int16x8_t c_ = (c); \
7072 int16x4_t b_ = (b); \
7073 int32x4_t a_ = (a); \
7074 int32x4_t result; \
7075 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7076 : "=w"(result) \
7077 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7078 : /* No clobbers */); \
7079 result; \
7080 })
7081
7082 #define vmlal_laneq_s32(a, b, c, d) \
7083 __extension__ \
7084 ({ \
7085 int32x4_t c_ = (c); \
7086 int32x2_t b_ = (b); \
7087 int64x2_t a_ = (a); \
7088 int64x2_t result; \
7089 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7090 : "=w"(result) \
7091 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7092 : /* No clobbers */); \
7093 result; \
7094 })
7095
7096 #define vmlal_laneq_u16(a, b, c, d) \
7097 __extension__ \
7098 ({ \
7099 uint16x8_t c_ = (c); \
7100 uint16x4_t b_ = (b); \
7101 uint32x4_t a_ = (a); \
7102 uint32x4_t result; \
7103 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7104 : "=w"(result) \
7105 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7106 : /* No clobbers */); \
7107 result; \
7108 })
7109
7110 #define vmlal_laneq_u32(a, b, c, d) \
7111 __extension__ \
7112 ({ \
7113 uint32x4_t c_ = (c); \
7114 uint32x2_t b_ = (b); \
7115 uint64x2_t a_ = (a); \
7116 uint64x2_t result; \
7117 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7118 : "=w"(result) \
7119 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7120 : /* No clobbers */); \
7121 result; \
7122 })
7123
7124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7125 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7126 {
7127 int32x4_t result;
7128 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "x"(c)
7131 : /* No clobbers */);
7132 return result;
7133 }
7134
7135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7136 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7137 {
7138 int64x2_t result;
7139 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7144 }
7145
7146 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7147 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7148 {
7149 uint32x4_t result;
7150 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "x"(c)
7153 : /* No clobbers */);
7154 return result;
7155 }
7156
7157 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7158 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7159 {
7160 uint64x2_t result;
7161 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7166 }
7167
7168 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7169 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7170 {
7171 int16x8_t result;
7172 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7177 }
7178
7179 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7180 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7181 {
7182 int32x4_t result;
7183 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7184 : "=w"(result)
7185 : "0"(a), "w"(b), "w"(c)
7186 : /* No clobbers */);
7187 return result;
7188 }
7189
7190 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7191 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7192 {
7193 int64x2_t result;
7194 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7195 : "=w"(result)
7196 : "0"(a), "w"(b), "w"(c)
7197 : /* No clobbers */);
7198 return result;
7199 }
7200
7201 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7202 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7203 {
7204 uint16x8_t result;
7205 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7206 : "=w"(result)
7207 : "0"(a), "w"(b), "w"(c)
7208 : /* No clobbers */);
7209 return result;
7210 }
7211
7212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7213 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7214 {
7215 uint32x4_t result;
7216 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7217 : "=w"(result)
7218 : "0"(a), "w"(b), "w"(c)
7219 : /* No clobbers */);
7220 return result;
7221 }
7222
7223 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7224 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7225 {
7226 uint64x2_t result;
7227 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7228 : "=w"(result)
7229 : "0"(a), "w"(b), "w"(c)
7230 : /* No clobbers */);
7231 return result;
7232 }
7233
7234 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7235 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7236 {
7237 float32x4_t result;
7238 float32x4_t t1;
7239 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7240 : "=w"(result), "=w"(t1)
7241 : "0"(a), "w"(b), "w"(c)
7242 : /* No clobbers */);
7243 return result;
7244 }
7245
7246 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7247 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7248 {
7249 float64x2_t result;
7250 float64x2_t t1;
7251 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7252 : "=w"(result), "=w"(t1)
7253 : "0"(a), "w"(b), "w"(c)
7254 : /* No clobbers */);
7255 return result;
7256 }
7257
7258 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7259 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7260 {
7261 int16x8_t result;
7262 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7263 : "=w"(result)
7264 : "0"(a), "w"(b), "x"(c)
7265 : /* No clobbers */);
7266 return result;
7267 }
7268
7269 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7270 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7271 {
7272 int32x4_t result;
7273 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7274 : "=w"(result)
7275 : "0"(a), "w"(b), "w"(c)
7276 : /* No clobbers */);
7277 return result;
7278 }
7279
7280 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7281 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7282 {
7283 uint16x8_t result;
7284 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7285 : "=w"(result)
7286 : "0"(a), "w"(b), "x"(c)
7287 : /* No clobbers */);
7288 return result;
7289 }
7290
7291 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7292 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7293 {
7294 uint32x4_t result;
7295 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7296 : "=w"(result)
7297 : "0"(a), "w"(b), "w"(c)
7298 : /* No clobbers */);
7299 return result;
7300 }
7301
7302 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7303 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7304 {
7305 int8x16_t result;
7306 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7307 : "=w"(result)
7308 : "0"(a), "w"(b), "w"(c)
7309 : /* No clobbers */);
7310 return result;
7311 }
7312
7313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7314 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7315 {
7316 int16x8_t result;
7317 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7318 : "=w"(result)
7319 : "0"(a), "w"(b), "w"(c)
7320 : /* No clobbers */);
7321 return result;
7322 }
7323
7324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7325 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7326 {
7327 int32x4_t result;
7328 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7329 : "=w"(result)
7330 : "0"(a), "w"(b), "w"(c)
7331 : /* No clobbers */);
7332 return result;
7333 }
7334
7335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7336 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7337 {
7338 uint8x16_t result;
7339 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7340 : "=w"(result)
7341 : "0"(a), "w"(b), "w"(c)
7342 : /* No clobbers */);
7343 return result;
7344 }
7345
7346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7347 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7348 {
7349 uint16x8_t result;
7350 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7351 : "=w"(result)
7352 : "0"(a), "w"(b), "w"(c)
7353 : /* No clobbers */);
7354 return result;
7355 }
7356
7357 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7358 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7359 {
7360 uint32x4_t result;
7361 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7362 : "=w"(result)
7363 : "0"(a), "w"(b), "w"(c)
7364 : /* No clobbers */);
7365 return result;
7366 }
7367
7368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7369 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7370 {
7371 float32x2_t result;
7372 float32x2_t t1;
7373 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7374 : "=w"(result), "=w"(t1)
7375 : "0"(a), "w"(b), "w"(c)
7376 : /* No clobbers */);
7377 return result;
7378 }
7379
7380 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7381 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7382 {
7383 int16x4_t result;
7384 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7385 : "=w"(result)
7386 : "0"(a), "w"(b), "x"(c)
7387 : /* No clobbers */);
7388 return result;
7389 }
7390
7391 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7392 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7393 {
7394 int32x2_t result;
7395 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7396 : "=w"(result)
7397 : "0"(a), "w"(b), "w"(c)
7398 : /* No clobbers */);
7399 return result;
7400 }
7401
7402 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7403 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7404 {
7405 uint16x4_t result;
7406 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7407 : "=w"(result)
7408 : "0"(a), "w"(b), "x"(c)
7409 : /* No clobbers */);
7410 return result;
7411 }
7412
7413 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7414 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7415 {
7416 uint32x2_t result;
7417 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7418 : "=w"(result)
7419 : "0"(a), "w"(b), "w"(c)
7420 : /* No clobbers */);
7421 return result;
7422 }
7423
7424 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7425 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7426 {
7427 int8x8_t result;
7428 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7429 : "=w"(result)
7430 : "0"(a), "w"(b), "w"(c)
7431 : /* No clobbers */);
7432 return result;
7433 }
7434
7435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7436 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7437 {
7438 int16x4_t result;
7439 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7440 : "=w"(result)
7441 : "0"(a), "w"(b), "w"(c)
7442 : /* No clobbers */);
7443 return result;
7444 }
7445
7446 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7447 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7448 {
7449 int32x2_t result;
7450 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7451 : "=w"(result)
7452 : "0"(a), "w"(b), "w"(c)
7453 : /* No clobbers */);
7454 return result;
7455 }
7456
7457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7458 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7459 {
7460 uint8x8_t result;
7461 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7462 : "=w"(result)
7463 : "0"(a), "w"(b), "w"(c)
7464 : /* No clobbers */);
7465 return result;
7466 }
7467
7468 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7469 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7470 {
7471 uint16x4_t result;
7472 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7473 : "=w"(result)
7474 : "0"(a), "w"(b), "w"(c)
7475 : /* No clobbers */);
7476 return result;
7477 }
7478
7479 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7480 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7481 {
7482 uint32x2_t result;
7483 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7484 : "=w"(result)
7485 : "0"(a), "w"(b), "w"(c)
7486 : /* No clobbers */);
7487 return result;
7488 }
7489
7490 #define vmlsl_high_lane_s16(a, b, c, d) \
7491 __extension__ \
7492 ({ \
7493 int16x8_t c_ = (c); \
7494 int16x8_t b_ = (b); \
7495 int32x4_t a_ = (a); \
7496 int32x4_t result; \
7497 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7498 : "=w"(result) \
7499 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7500 : /* No clobbers */); \
7501 result; \
7502 })
7503
7504 #define vmlsl_high_lane_s32(a, b, c, d) \
7505 __extension__ \
7506 ({ \
7507 int32x4_t c_ = (c); \
7508 int32x4_t b_ = (b); \
7509 int64x2_t a_ = (a); \
7510 int64x2_t result; \
7511 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7512 : "=w"(result) \
7513 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7514 : /* No clobbers */); \
7515 result; \
7516 })
7517
7518 #define vmlsl_high_lane_u16(a, b, c, d) \
7519 __extension__ \
7520 ({ \
7521 uint16x8_t c_ = (c); \
7522 uint16x8_t b_ = (b); \
7523 uint32x4_t a_ = (a); \
7524 uint32x4_t result; \
7525 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7526 : "=w"(result) \
7527 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7528 : /* No clobbers */); \
7529 result; \
7530 })
7531
7532 #define vmlsl_high_lane_u32(a, b, c, d) \
7533 __extension__ \
7534 ({ \
7535 uint32x4_t c_ = (c); \
7536 uint32x4_t b_ = (b); \
7537 uint64x2_t a_ = (a); \
7538 uint64x2_t result; \
7539 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7540 : "=w"(result) \
7541 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7542 : /* No clobbers */); \
7543 result; \
7544 })
7545
7546 #define vmlsl_high_laneq_s16(a, b, c, d) \
7547 __extension__ \
7548 ({ \
7549 int16x8_t c_ = (c); \
7550 int16x8_t b_ = (b); \
7551 int32x4_t a_ = (a); \
7552 int32x4_t result; \
7553 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7554 : "=w"(result) \
7555 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7556 : /* No clobbers */); \
7557 result; \
7558 })
7559
7560 #define vmlsl_high_laneq_s32(a, b, c, d) \
7561 __extension__ \
7562 ({ \
7563 int32x4_t c_ = (c); \
7564 int32x4_t b_ = (b); \
7565 int64x2_t a_ = (a); \
7566 int64x2_t result; \
7567 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7568 : "=w"(result) \
7569 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7570 : /* No clobbers */); \
7571 result; \
7572 })
7573
7574 #define vmlsl_high_laneq_u16(a, b, c, d) \
7575 __extension__ \
7576 ({ \
7577 uint16x8_t c_ = (c); \
7578 uint16x8_t b_ = (b); \
7579 uint32x4_t a_ = (a); \
7580 uint32x4_t result; \
7581 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7582 : "=w"(result) \
7583 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7584 : /* No clobbers */); \
7585 result; \
7586 })
7587
7588 #define vmlsl_high_laneq_u32(a, b, c, d) \
7589 __extension__ \
7590 ({ \
7591 uint32x4_t c_ = (c); \
7592 uint32x4_t b_ = (b); \
7593 uint64x2_t a_ = (a); \
7594 uint64x2_t result; \
7595 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7596 : "=w"(result) \
7597 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7598 : /* No clobbers */); \
7599 result; \
7600 })
7601
7602 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7603 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7604 {
7605 int32x4_t result;
7606 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7607 : "=w"(result)
7608 : "0"(a), "w"(b), "x"(c)
7609 : /* No clobbers */);
7610 return result;
7611 }
7612
7613 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7614 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7615 {
7616 int64x2_t result;
7617 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7618 : "=w"(result)
7619 : "0"(a), "w"(b), "w"(c)
7620 : /* No clobbers */);
7621 return result;
7622 }
7623
7624 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7625 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7626 {
7627 uint32x4_t result;
7628 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7629 : "=w"(result)
7630 : "0"(a), "w"(b), "x"(c)
7631 : /* No clobbers */);
7632 return result;
7633 }
7634
7635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7636 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7637 {
7638 uint64x2_t result;
7639 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7640 : "=w"(result)
7641 : "0"(a), "w"(b), "w"(c)
7642 : /* No clobbers */);
7643 return result;
7644 }
7645
7646 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7647 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7648 {
7649 int16x8_t result;
7650 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7651 : "=w"(result)
7652 : "0"(a), "w"(b), "w"(c)
7653 : /* No clobbers */);
7654 return result;
7655 }
7656
7657 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7658 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7659 {
7660 int32x4_t result;
7661 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7662 : "=w"(result)
7663 : "0"(a), "w"(b), "w"(c)
7664 : /* No clobbers */);
7665 return result;
7666 }
7667
7668 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7669 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7670 {
7671 int64x2_t result;
7672 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7673 : "=w"(result)
7674 : "0"(a), "w"(b), "w"(c)
7675 : /* No clobbers */);
7676 return result;
7677 }
7678
7679 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7680 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7681 {
7682 uint16x8_t result;
7683 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7684 : "=w"(result)
7685 : "0"(a), "w"(b), "w"(c)
7686 : /* No clobbers */);
7687 return result;
7688 }
7689
7690 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7691 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7692 {
7693 uint32x4_t result;
7694 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7695 : "=w"(result)
7696 : "0"(a), "w"(b), "w"(c)
7697 : /* No clobbers */);
7698 return result;
7699 }
7700
7701 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7702 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7703 {
7704 uint64x2_t result;
7705 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7706 : "=w"(result)
7707 : "0"(a), "w"(b), "w"(c)
7708 : /* No clobbers */);
7709 return result;
7710 }
7711
7712 #define vmlsl_lane_s16(a, b, c, d) \
7713 __extension__ \
7714 ({ \
7715 int16x4_t c_ = (c); \
7716 int16x4_t b_ = (b); \
7717 int32x4_t a_ = (a); \
7718 int32x4_t result; \
7719 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7720 : "=w"(result) \
7721 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7722 : /* No clobbers */); \
7723 result; \
7724 })
7725
7726 #define vmlsl_lane_s32(a, b, c, d) \
7727 __extension__ \
7728 ({ \
7729 int32x2_t c_ = (c); \
7730 int32x2_t b_ = (b); \
7731 int64x2_t a_ = (a); \
7732 int64x2_t result; \
7733 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7734 : "=w"(result) \
7735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7736 : /* No clobbers */); \
7737 result; \
7738 })
7739
7740 #define vmlsl_lane_u16(a, b, c, d) \
7741 __extension__ \
7742 ({ \
7743 uint16x4_t c_ = (c); \
7744 uint16x4_t b_ = (b); \
7745 uint32x4_t a_ = (a); \
7746 uint32x4_t result; \
7747 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7748 : "=w"(result) \
7749 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7750 : /* No clobbers */); \
7751 result; \
7752 })
7753
7754 #define vmlsl_lane_u32(a, b, c, d) \
7755 __extension__ \
7756 ({ \
7757 uint32x2_t c_ = (c); \
7758 uint32x2_t b_ = (b); \
7759 uint64x2_t a_ = (a); \
7760 uint64x2_t result; \
7761 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7762 : "=w"(result) \
7763 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7764 : /* No clobbers */); \
7765 result; \
7766 })
7767
7768 #define vmlsl_laneq_s16(a, b, c, d) \
7769 __extension__ \
7770 ({ \
7771 int16x8_t c_ = (c); \
7772 int16x4_t b_ = (b); \
7773 int32x4_t a_ = (a); \
7774 int32x4_t result; \
7775 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7776 : "=w"(result) \
7777 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7778 : /* No clobbers */); \
7779 result; \
7780 })
7781
7782 #define vmlsl_laneq_s32(a, b, c, d) \
7783 __extension__ \
7784 ({ \
7785 int32x4_t c_ = (c); \
7786 int32x2_t b_ = (b); \
7787 int64x2_t a_ = (a); \
7788 int64x2_t result; \
7789 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7790 : "=w"(result) \
7791 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7792 : /* No clobbers */); \
7793 result; \
7794 })
7795
7796 #define vmlsl_laneq_u16(a, b, c, d) \
7797 __extension__ \
7798 ({ \
7799 uint16x8_t c_ = (c); \
7800 uint16x4_t b_ = (b); \
7801 uint32x4_t a_ = (a); \
7802 uint32x4_t result; \
7803 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7804 : "=w"(result) \
7805 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7806 : /* No clobbers */); \
7807 result; \
7808 })
7809
7810 #define vmlsl_laneq_u32(a, b, c, d) \
7811 __extension__ \
7812 ({ \
7813 uint32x4_t c_ = (c); \
7814 uint32x2_t b_ = (b); \
7815 uint64x2_t a_ = (a); \
7816 uint64x2_t result; \
7817 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7818 : "=w"(result) \
7819 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7820 : /* No clobbers */); \
7821 result; \
7822 })
7823
7824 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7825 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7826 {
7827 int32x4_t result;
7828 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7829 : "=w"(result)
7830 : "0"(a), "w"(b), "x"(c)
7831 : /* No clobbers */);
7832 return result;
7833 }
7834
7835 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7836 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7837 {
7838 int64x2_t result;
7839 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7840 : "=w"(result)
7841 : "0"(a), "w"(b), "w"(c)
7842 : /* No clobbers */);
7843 return result;
7844 }
7845
7846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7847 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7848 {
7849 uint32x4_t result;
7850 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7851 : "=w"(result)
7852 : "0"(a), "w"(b), "x"(c)
7853 : /* No clobbers */);
7854 return result;
7855 }
7856
7857 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7858 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7859 {
7860 uint64x2_t result;
7861 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7862 : "=w"(result)
7863 : "0"(a), "w"(b), "w"(c)
7864 : /* No clobbers */);
7865 return result;
7866 }
7867
7868 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7869 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7870 {
7871 int16x8_t result;
7872 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7873 : "=w"(result)
7874 : "0"(a), "w"(b), "w"(c)
7875 : /* No clobbers */);
7876 return result;
7877 }
7878
7879 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7880 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7881 {
7882 int32x4_t result;
7883 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7884 : "=w"(result)
7885 : "0"(a), "w"(b), "w"(c)
7886 : /* No clobbers */);
7887 return result;
7888 }
7889
7890 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7891 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7892 {
7893 int64x2_t result;
7894 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7895 : "=w"(result)
7896 : "0"(a), "w"(b), "w"(c)
7897 : /* No clobbers */);
7898 return result;
7899 }
7900
7901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7902 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7903 {
7904 uint16x8_t result;
7905 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7906 : "=w"(result)
7907 : "0"(a), "w"(b), "w"(c)
7908 : /* No clobbers */);
7909 return result;
7910 }
7911
7912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7913 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7914 {
7915 uint32x4_t result;
7916 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7917 : "=w"(result)
7918 : "0"(a), "w"(b), "w"(c)
7919 : /* No clobbers */);
7920 return result;
7921 }
7922
7923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7924 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7925 {
7926 uint64x2_t result;
7927 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7928 : "=w"(result)
7929 : "0"(a), "w"(b), "w"(c)
7930 : /* No clobbers */);
7931 return result;
7932 }
7933
7934 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7935 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7936 {
7937 float32x4_t result;
7938 float32x4_t t1;
7939 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7940 : "=w"(result), "=w"(t1)
7941 : "0"(a), "w"(b), "w"(c)
7942 : /* No clobbers */);
7943 return result;
7944 }
7945
7946 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7947 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7948 {
7949 float64x2_t result;
7950 float64x2_t t1;
7951 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
7952 : "=w"(result), "=w"(t1)
7953 : "0"(a), "w"(b), "x"(c)
7954 : /* No clobbers */);
7955 return result;
7956 }
7957
7958 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7959 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7960 {
7961 int16x8_t result;
7962 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7963 : "=w"(result)
7964 : "0"(a), "w"(b), "x"(c)
7965 : /* No clobbers */);
7966 return result;
7967 }
7968
7969 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7970 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7971 {
7972 int32x4_t result;
7973 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7974 : "=w"(result)
7975 : "0"(a), "w"(b), "w"(c)
7976 : /* No clobbers */);
7977 return result;
7978 }
7979
7980 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7981 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7982 {
7983 uint16x8_t result;
7984 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7985 : "=w"(result)
7986 : "0"(a), "w"(b), "x"(c)
7987 : /* No clobbers */);
7988 return result;
7989 }
7990
7991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7992 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7993 {
7994 uint32x4_t result;
7995 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7996 : "=w"(result)
7997 : "0"(a), "w"(b), "w"(c)
7998 : /* No clobbers */);
7999 return result;
8000 }
8001
8002 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8003 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8004 {
8005 int8x16_t result;
8006 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8007 : "=w"(result)
8008 : "0"(a), "w"(b), "w"(c)
8009 : /* No clobbers */);
8010 return result;
8011 }
8012
8013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8014 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8015 {
8016 int16x8_t result;
8017 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8018 : "=w"(result)
8019 : "0"(a), "w"(b), "w"(c)
8020 : /* No clobbers */);
8021 return result;
8022 }
8023
8024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8025 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8026 {
8027 int32x4_t result;
8028 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8029 : "=w"(result)
8030 : "0"(a), "w"(b), "w"(c)
8031 : /* No clobbers */);
8032 return result;
8033 }
8034
8035 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8036 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8037 {
8038 uint8x16_t result;
8039 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8040 : "=w"(result)
8041 : "0"(a), "w"(b), "w"(c)
8042 : /* No clobbers */);
8043 return result;
8044 }
8045
8046 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8047 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8048 {
8049 uint16x8_t result;
8050 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8051 : "=w"(result)
8052 : "0"(a), "w"(b), "w"(c)
8053 : /* No clobbers */);
8054 return result;
8055 }
8056
8057 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8058 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8059 {
8060 uint32x4_t result;
8061 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8062 : "=w"(result)
8063 : "0"(a), "w"(b), "w"(c)
8064 : /* No clobbers */);
8065 return result;
8066 }
8067
8068 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8069 vmovl_high_s8 (int8x16_t a)
8070 {
8071 int16x8_t result;
8072 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8073 : "=w"(result)
8074 : "w"(a)
8075 : /* No clobbers */);
8076 return result;
8077 }
8078
8079 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8080 vmovl_high_s16 (int16x8_t a)
8081 {
8082 int32x4_t result;
8083 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8084 : "=w"(result)
8085 : "w"(a)
8086 : /* No clobbers */);
8087 return result;
8088 }
8089
8090 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8091 vmovl_high_s32 (int32x4_t a)
8092 {
8093 int64x2_t result;
8094 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8095 : "=w"(result)
8096 : "w"(a)
8097 : /* No clobbers */);
8098 return result;
8099 }
8100
8101 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8102 vmovl_high_u8 (uint8x16_t a)
8103 {
8104 uint16x8_t result;
8105 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8106 : "=w"(result)
8107 : "w"(a)
8108 : /* No clobbers */);
8109 return result;
8110 }
8111
8112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8113 vmovl_high_u16 (uint16x8_t a)
8114 {
8115 uint32x4_t result;
8116 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8117 : "=w"(result)
8118 : "w"(a)
8119 : /* No clobbers */);
8120 return result;
8121 }
8122
8123 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8124 vmovl_high_u32 (uint32x4_t a)
8125 {
8126 uint64x2_t result;
8127 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8128 : "=w"(result)
8129 : "w"(a)
8130 : /* No clobbers */);
8131 return result;
8132 }
8133
8134 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8135 vmovl_s8 (int8x8_t a)
8136 {
8137 int16x8_t result;
8138 __asm__ ("sshll %0.8h,%1.8b,#0"
8139 : "=w"(result)
8140 : "w"(a)
8141 : /* No clobbers */);
8142 return result;
8143 }
8144
8145 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8146 vmovl_s16 (int16x4_t a)
8147 {
8148 int32x4_t result;
8149 __asm__ ("sshll %0.4s,%1.4h,#0"
8150 : "=w"(result)
8151 : "w"(a)
8152 : /* No clobbers */);
8153 return result;
8154 }
8155
8156 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8157 vmovl_s32 (int32x2_t a)
8158 {
8159 int64x2_t result;
8160 __asm__ ("sshll %0.2d,%1.2s,#0"
8161 : "=w"(result)
8162 : "w"(a)
8163 : /* No clobbers */);
8164 return result;
8165 }
8166
8167 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8168 vmovl_u8 (uint8x8_t a)
8169 {
8170 uint16x8_t result;
8171 __asm__ ("ushll %0.8h,%1.8b,#0"
8172 : "=w"(result)
8173 : "w"(a)
8174 : /* No clobbers */);
8175 return result;
8176 }
8177
8178 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8179 vmovl_u16 (uint16x4_t a)
8180 {
8181 uint32x4_t result;
8182 __asm__ ("ushll %0.4s,%1.4h,#0"
8183 : "=w"(result)
8184 : "w"(a)
8185 : /* No clobbers */);
8186 return result;
8187 }
8188
8189 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8190 vmovl_u32 (uint32x2_t a)
8191 {
8192 uint64x2_t result;
8193 __asm__ ("ushll %0.2d,%1.2s,#0"
8194 : "=w"(result)
8195 : "w"(a)
8196 : /* No clobbers */);
8197 return result;
8198 }
8199
8200 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8201 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8202 {
8203 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8204 __asm__ ("xtn2 %0.16b,%1.8h"
8205 : "+w"(result)
8206 : "w"(b)
8207 : /* No clobbers */);
8208 return result;
8209 }
8210
8211 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8212 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8213 {
8214 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8215 __asm__ ("xtn2 %0.8h,%1.4s"
8216 : "+w"(result)
8217 : "w"(b)
8218 : /* No clobbers */);
8219 return result;
8220 }
8221
8222 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8223 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8224 {
8225 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8226 __asm__ ("xtn2 %0.4s,%1.2d"
8227 : "+w"(result)
8228 : "w"(b)
8229 : /* No clobbers */);
8230 return result;
8231 }
8232
8233 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8234 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8235 {
8236 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8237 __asm__ ("xtn2 %0.16b,%1.8h"
8238 : "+w"(result)
8239 : "w"(b)
8240 : /* No clobbers */);
8241 return result;
8242 }
8243
8244 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8245 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8246 {
8247 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8248 __asm__ ("xtn2 %0.8h,%1.4s"
8249 : "+w"(result)
8250 : "w"(b)
8251 : /* No clobbers */);
8252 return result;
8253 }
8254
8255 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8256 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8257 {
8258 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8259 __asm__ ("xtn2 %0.4s,%1.2d"
8260 : "+w"(result)
8261 : "w"(b)
8262 : /* No clobbers */);
8263 return result;
8264 }
8265
8266 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8267 vmovn_s16 (int16x8_t a)
8268 {
8269 int8x8_t result;
8270 __asm__ ("xtn %0.8b,%1.8h"
8271 : "=w"(result)
8272 : "w"(a)
8273 : /* No clobbers */);
8274 return result;
8275 }
8276
8277 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8278 vmovn_s32 (int32x4_t a)
8279 {
8280 int16x4_t result;
8281 __asm__ ("xtn %0.4h,%1.4s"
8282 : "=w"(result)
8283 : "w"(a)
8284 : /* No clobbers */);
8285 return result;
8286 }
8287
8288 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8289 vmovn_s64 (int64x2_t a)
8290 {
8291 int32x2_t result;
8292 __asm__ ("xtn %0.2s,%1.2d"
8293 : "=w"(result)
8294 : "w"(a)
8295 : /* No clobbers */);
8296 return result;
8297 }
8298
8299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8300 vmovn_u16 (uint16x8_t a)
8301 {
8302 uint8x8_t result;
8303 __asm__ ("xtn %0.8b,%1.8h"
8304 : "=w"(result)
8305 : "w"(a)
8306 : /* No clobbers */);
8307 return result;
8308 }
8309
8310 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8311 vmovn_u32 (uint32x4_t a)
8312 {
8313 uint16x4_t result;
8314 __asm__ ("xtn %0.4h,%1.4s"
8315 : "=w"(result)
8316 : "w"(a)
8317 : /* No clobbers */);
8318 return result;
8319 }
8320
8321 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8322 vmovn_u64 (uint64x2_t a)
8323 {
8324 uint32x2_t result;
8325 __asm__ ("xtn %0.2s,%1.2d"
8326 : "=w"(result)
8327 : "w"(a)
8328 : /* No clobbers */);
8329 return result;
8330 }
8331
8332 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8333 vmul_n_f32 (float32x2_t a, float32_t b)
8334 {
8335 float32x2_t result;
8336 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8337 : "=w"(result)
8338 : "w"(a), "w"(b)
8339 : /* No clobbers */);
8340 return result;
8341 }
8342
8343 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8344 vmul_n_s16 (int16x4_t a, int16_t b)
8345 {
8346 int16x4_t result;
8347 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8348 : "=w"(result)
8349 : "w"(a), "x"(b)
8350 : /* No clobbers */);
8351 return result;
8352 }
8353
8354 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8355 vmul_n_s32 (int32x2_t a, int32_t b)
8356 {
8357 int32x2_t result;
8358 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8359 : "=w"(result)
8360 : "w"(a), "w"(b)
8361 : /* No clobbers */);
8362 return result;
8363 }
8364
8365 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8366 vmul_n_u16 (uint16x4_t a, uint16_t b)
8367 {
8368 uint16x4_t result;
8369 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8370 : "=w"(result)
8371 : "w"(a), "x"(b)
8372 : /* No clobbers */);
8373 return result;
8374 }
8375
8376 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8377 vmul_n_u32 (uint32x2_t a, uint32_t b)
8378 {
8379 uint32x2_t result;
8380 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8381 : "=w"(result)
8382 : "w"(a), "w"(b)
8383 : /* No clobbers */);
8384 return result;
8385 }
8386
8387 #define vmuld_lane_f64(a, b, c) \
8388 __extension__ \
8389 ({ \
8390 float64x2_t b_ = (b); \
8391 float64_t a_ = (a); \
8392 float64_t result; \
8393 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8394 : "=w"(result) \
8395 : "w"(a_), "w"(b_), "i"(c) \
8396 : /* No clobbers */); \
8397 result; \
8398 })
8399
8400 #define vmull_high_lane_s16(a, b, c) \
8401 __extension__ \
8402 ({ \
8403 int16x8_t b_ = (b); \
8404 int16x8_t a_ = (a); \
8405 int32x4_t result; \
8406 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8407 : "=w"(result) \
8408 : "w"(a_), "x"(b_), "i"(c) \
8409 : /* No clobbers */); \
8410 result; \
8411 })
8412
8413 #define vmull_high_lane_s32(a, b, c) \
8414 __extension__ \
8415 ({ \
8416 int32x4_t b_ = (b); \
8417 int32x4_t a_ = (a); \
8418 int64x2_t result; \
8419 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8420 : "=w"(result) \
8421 : "w"(a_), "w"(b_), "i"(c) \
8422 : /* No clobbers */); \
8423 result; \
8424 })
8425
8426 #define vmull_high_lane_u16(a, b, c) \
8427 __extension__ \
8428 ({ \
8429 uint16x8_t b_ = (b); \
8430 uint16x8_t a_ = (a); \
8431 uint32x4_t result; \
8432 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8433 : "=w"(result) \
8434 : "w"(a_), "x"(b_), "i"(c) \
8435 : /* No clobbers */); \
8436 result; \
8437 })
8438
8439 #define vmull_high_lane_u32(a, b, c) \
8440 __extension__ \
8441 ({ \
8442 uint32x4_t b_ = (b); \
8443 uint32x4_t a_ = (a); \
8444 uint64x2_t result; \
8445 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8446 : "=w"(result) \
8447 : "w"(a_), "w"(b_), "i"(c) \
8448 : /* No clobbers */); \
8449 result; \
8450 })
8451
8452 #define vmull_high_laneq_s16(a, b, c) \
8453 __extension__ \
8454 ({ \
8455 int16x8_t b_ = (b); \
8456 int16x8_t a_ = (a); \
8457 int32x4_t result; \
8458 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8459 : "=w"(result) \
8460 : "w"(a_), "x"(b_), "i"(c) \
8461 : /* No clobbers */); \
8462 result; \
8463 })
8464
8465 #define vmull_high_laneq_s32(a, b, c) \
8466 __extension__ \
8467 ({ \
8468 int32x4_t b_ = (b); \
8469 int32x4_t a_ = (a); \
8470 int64x2_t result; \
8471 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8472 : "=w"(result) \
8473 : "w"(a_), "w"(b_), "i"(c) \
8474 : /* No clobbers */); \
8475 result; \
8476 })
8477
8478 #define vmull_high_laneq_u16(a, b, c) \
8479 __extension__ \
8480 ({ \
8481 uint16x8_t b_ = (b); \
8482 uint16x8_t a_ = (a); \
8483 uint32x4_t result; \
8484 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8485 : "=w"(result) \
8486 : "w"(a_), "x"(b_), "i"(c) \
8487 : /* No clobbers */); \
8488 result; \
8489 })
8490
8491 #define vmull_high_laneq_u32(a, b, c) \
8492 __extension__ \
8493 ({ \
8494 uint32x4_t b_ = (b); \
8495 uint32x4_t a_ = (a); \
8496 uint64x2_t result; \
8497 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8498 : "=w"(result) \
8499 : "w"(a_), "w"(b_), "i"(c) \
8500 : /* No clobbers */); \
8501 result; \
8502 })
8503
8504 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8505 vmull_high_n_s16 (int16x8_t a, int16_t b)
8506 {
8507 int32x4_t result;
8508 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8509 : "=w"(result)
8510 : "w"(a), "x"(b)
8511 : /* No clobbers */);
8512 return result;
8513 }
8514
8515 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8516 vmull_high_n_s32 (int32x4_t a, int32_t b)
8517 {
8518 int64x2_t result;
8519 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8520 : "=w"(result)
8521 : "w"(a), "w"(b)
8522 : /* No clobbers */);
8523 return result;
8524 }
8525
8526 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8527 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8528 {
8529 uint32x4_t result;
8530 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8531 : "=w"(result)
8532 : "w"(a), "x"(b)
8533 : /* No clobbers */);
8534 return result;
8535 }
8536
8537 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8538 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8539 {
8540 uint64x2_t result;
8541 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8542 : "=w"(result)
8543 : "w"(a), "w"(b)
8544 : /* No clobbers */);
8545 return result;
8546 }
8547
8548 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8549 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8550 {
8551 poly16x8_t result;
8552 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8553 : "=w"(result)
8554 : "w"(a), "w"(b)
8555 : /* No clobbers */);
8556 return result;
8557 }
8558
8559 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8560 vmull_high_s8 (int8x16_t a, int8x16_t b)
8561 {
8562 int16x8_t result;
8563 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8564 : "=w"(result)
8565 : "w"(a), "w"(b)
8566 : /* No clobbers */);
8567 return result;
8568 }
8569
8570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8571 vmull_high_s16 (int16x8_t a, int16x8_t b)
8572 {
8573 int32x4_t result;
8574 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8575 : "=w"(result)
8576 : "w"(a), "w"(b)
8577 : /* No clobbers */);
8578 return result;
8579 }
8580
8581 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8582 vmull_high_s32 (int32x4_t a, int32x4_t b)
8583 {
8584 int64x2_t result;
8585 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8586 : "=w"(result)
8587 : "w"(a), "w"(b)
8588 : /* No clobbers */);
8589 return result;
8590 }
8591
8592 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8593 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8594 {
8595 uint16x8_t result;
8596 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8597 : "=w"(result)
8598 : "w"(a), "w"(b)
8599 : /* No clobbers */);
8600 return result;
8601 }
8602
8603 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8604 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8605 {
8606 uint32x4_t result;
8607 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8608 : "=w"(result)
8609 : "w"(a), "w"(b)
8610 : /* No clobbers */);
8611 return result;
8612 }
8613
8614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8615 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8616 {
8617 uint64x2_t result;
8618 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8619 : "=w"(result)
8620 : "w"(a), "w"(b)
8621 : /* No clobbers */);
8622 return result;
8623 }
8624
8625 #define vmull_lane_s16(a, b, c) \
8626 __extension__ \
8627 ({ \
8628 int16x4_t b_ = (b); \
8629 int16x4_t a_ = (a); \
8630 int32x4_t result; \
8631 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8632 : "=w"(result) \
8633 : "w"(a_), "x"(b_), "i"(c) \
8634 : /* No clobbers */); \
8635 result; \
8636 })
8637
8638 #define vmull_lane_s32(a, b, c) \
8639 __extension__ \
8640 ({ \
8641 int32x2_t b_ = (b); \
8642 int32x2_t a_ = (a); \
8643 int64x2_t result; \
8644 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8645 : "=w"(result) \
8646 : "w"(a_), "w"(b_), "i"(c) \
8647 : /* No clobbers */); \
8648 result; \
8649 })
8650
8651 #define vmull_lane_u16(a, b, c) \
8652 __extension__ \
8653 ({ \
8654 uint16x4_t b_ = (b); \
8655 uint16x4_t a_ = (a); \
8656 uint32x4_t result; \
8657 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8658 : "=w"(result) \
8659 : "w"(a_), "x"(b_), "i"(c) \
8660 : /* No clobbers */); \
8661 result; \
8662 })
8663
8664 #define vmull_lane_u32(a, b, c) \
8665 __extension__ \
8666 ({ \
8667 uint32x2_t b_ = (b); \
8668 uint32x2_t a_ = (a); \
8669 uint64x2_t result; \
8670 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8671 : "=w"(result) \
8672 : "w"(a_), "w"(b_), "i"(c) \
8673 : /* No clobbers */); \
8674 result; \
8675 })
8676
8677 #define vmull_laneq_s16(a, b, c) \
8678 __extension__ \
8679 ({ \
8680 int16x8_t b_ = (b); \
8681 int16x4_t a_ = (a); \
8682 int32x4_t result; \
8683 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8684 : "=w"(result) \
8685 : "w"(a_), "x"(b_), "i"(c) \
8686 : /* No clobbers */); \
8687 result; \
8688 })
8689
8690 #define vmull_laneq_s32(a, b, c) \
8691 __extension__ \
8692 ({ \
8693 int32x4_t b_ = (b); \
8694 int32x2_t a_ = (a); \
8695 int64x2_t result; \
8696 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8697 : "=w"(result) \
8698 : "w"(a_), "w"(b_), "i"(c) \
8699 : /* No clobbers */); \
8700 result; \
8701 })
8702
8703 #define vmull_laneq_u16(a, b, c) \
8704 __extension__ \
8705 ({ \
8706 uint16x8_t b_ = (b); \
8707 uint16x4_t a_ = (a); \
8708 uint32x4_t result; \
8709 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8710 : "=w"(result) \
8711 : "w"(a_), "x"(b_), "i"(c) \
8712 : /* No clobbers */); \
8713 result; \
8714 })
8715
8716 #define vmull_laneq_u32(a, b, c) \
8717 __extension__ \
8718 ({ \
8719 uint32x4_t b_ = (b); \
8720 uint32x2_t a_ = (a); \
8721 uint64x2_t result; \
8722 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8723 : "=w"(result) \
8724 : "w"(a_), "w"(b_), "i"(c) \
8725 : /* No clobbers */); \
8726 result; \
8727 })
8728
8729 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8730 vmull_n_s16 (int16x4_t a, int16_t b)
8731 {
8732 int32x4_t result;
8733 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8734 : "=w"(result)
8735 : "w"(a), "x"(b)
8736 : /* No clobbers */);
8737 return result;
8738 }
8739
8740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8741 vmull_n_s32 (int32x2_t a, int32_t b)
8742 {
8743 int64x2_t result;
8744 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8745 : "=w"(result)
8746 : "w"(a), "w"(b)
8747 : /* No clobbers */);
8748 return result;
8749 }
8750
8751 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8752 vmull_n_u16 (uint16x4_t a, uint16_t b)
8753 {
8754 uint32x4_t result;
8755 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8756 : "=w"(result)
8757 : "w"(a), "x"(b)
8758 : /* No clobbers */);
8759 return result;
8760 }
8761
8762 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8763 vmull_n_u32 (uint32x2_t a, uint32_t b)
8764 {
8765 uint64x2_t result;
8766 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8767 : "=w"(result)
8768 : "w"(a), "w"(b)
8769 : /* No clobbers */);
8770 return result;
8771 }
8772
8773 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8774 vmull_p8 (poly8x8_t a, poly8x8_t b)
8775 {
8776 poly16x8_t result;
8777 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8778 : "=w"(result)
8779 : "w"(a), "w"(b)
8780 : /* No clobbers */);
8781 return result;
8782 }
8783
8784 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8785 vmull_s8 (int8x8_t a, int8x8_t b)
8786 {
8787 int16x8_t result;
8788 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8789 : "=w"(result)
8790 : "w"(a), "w"(b)
8791 : /* No clobbers */);
8792 return result;
8793 }
8794
8795 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8796 vmull_s16 (int16x4_t a, int16x4_t b)
8797 {
8798 int32x4_t result;
8799 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8800 : "=w"(result)
8801 : "w"(a), "w"(b)
8802 : /* No clobbers */);
8803 return result;
8804 }
8805
8806 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8807 vmull_s32 (int32x2_t a, int32x2_t b)
8808 {
8809 int64x2_t result;
8810 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8811 : "=w"(result)
8812 : "w"(a), "w"(b)
8813 : /* No clobbers */);
8814 return result;
8815 }
8816
8817 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8818 vmull_u8 (uint8x8_t a, uint8x8_t b)
8819 {
8820 uint16x8_t result;
8821 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8822 : "=w"(result)
8823 : "w"(a), "w"(b)
8824 : /* No clobbers */);
8825 return result;
8826 }
8827
8828 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8829 vmull_u16 (uint16x4_t a, uint16x4_t b)
8830 {
8831 uint32x4_t result;
8832 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8833 : "=w"(result)
8834 : "w"(a), "w"(b)
8835 : /* No clobbers */);
8836 return result;
8837 }
8838
8839 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8840 vmull_u32 (uint32x2_t a, uint32x2_t b)
8841 {
8842 uint64x2_t result;
8843 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8844 : "=w"(result)
8845 : "w"(a), "w"(b)
8846 : /* No clobbers */);
8847 return result;
8848 }
8849
8850 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8851 vmulq_n_f32 (float32x4_t a, float32_t b)
8852 {
8853 float32x4_t result;
8854 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8855 : "=w"(result)
8856 : "w"(a), "w"(b)
8857 : /* No clobbers */);
8858 return result;
8859 }
8860
8861 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8862 vmulq_n_f64 (float64x2_t a, float64_t b)
8863 {
8864 float64x2_t result;
8865 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8866 : "=w"(result)
8867 : "w"(a), "w"(b)
8868 : /* No clobbers */);
8869 return result;
8870 }
8871
8872 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8873 vmulq_n_s16 (int16x8_t a, int16_t b)
8874 {
8875 int16x8_t result;
8876 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8877 : "=w"(result)
8878 : "w"(a), "x"(b)
8879 : /* No clobbers */);
8880 return result;
8881 }
8882
8883 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8884 vmulq_n_s32 (int32x4_t a, int32_t b)
8885 {
8886 int32x4_t result;
8887 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8888 : "=w"(result)
8889 : "w"(a), "w"(b)
8890 : /* No clobbers */);
8891 return result;
8892 }
8893
8894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8895 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8896 {
8897 uint16x8_t result;
8898 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8899 : "=w"(result)
8900 : "w"(a), "x"(b)
8901 : /* No clobbers */);
8902 return result;
8903 }
8904
8905 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8906 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8907 {
8908 uint32x4_t result;
8909 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8910 : "=w"(result)
8911 : "w"(a), "w"(b)
8912 : /* No clobbers */);
8913 return result;
8914 }
8915
8916 #define vmuls_lane_f32(a, b, c) \
8917 __extension__ \
8918 ({ \
8919 float32x4_t b_ = (b); \
8920 float32_t a_ = (a); \
8921 float32_t result; \
8922 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
8923 : "=w"(result) \
8924 : "w"(a_), "w"(b_), "i"(c) \
8925 : /* No clobbers */); \
8926 result; \
8927 })
8928
8929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8930 vmulx_f32 (float32x2_t a, float32x2_t b)
8931 {
8932 float32x2_t result;
8933 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8934 : "=w"(result)
8935 : "w"(a), "w"(b)
8936 : /* No clobbers */);
8937 return result;
8938 }
8939
8940 #define vmulx_lane_f32(a, b, c) \
8941 __extension__ \
8942 ({ \
8943 float32x4_t b_ = (b); \
8944 float32x2_t a_ = (a); \
8945 float32x2_t result; \
8946 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8947 : "=w"(result) \
8948 : "w"(a_), "w"(b_), "i"(c) \
8949 : /* No clobbers */); \
8950 result; \
8951 })
8952
8953 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8954 vmulxd_f64 (float64_t a, float64_t b)
8955 {
8956 float64_t result;
8957 __asm__ ("fmulx %d0, %d1, %d2"
8958 : "=w"(result)
8959 : "w"(a), "w"(b)
8960 : /* No clobbers */);
8961 return result;
8962 }
8963
8964 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8965 vmulxq_f32 (float32x4_t a, float32x4_t b)
8966 {
8967 float32x4_t result;
8968 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8969 : "=w"(result)
8970 : "w"(a), "w"(b)
8971 : /* No clobbers */);
8972 return result;
8973 }
8974
8975 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8976 vmulxq_f64 (float64x2_t a, float64x2_t b)
8977 {
8978 float64x2_t result;
8979 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8980 : "=w"(result)
8981 : "w"(a), "w"(b)
8982 : /* No clobbers */);
8983 return result;
8984 }
8985
8986 #define vmulxq_lane_f32(a, b, c) \
8987 __extension__ \
8988 ({ \
8989 float32x4_t b_ = (b); \
8990 float32x4_t a_ = (a); \
8991 float32x4_t result; \
8992 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8993 : "=w"(result) \
8994 : "w"(a_), "w"(b_), "i"(c) \
8995 : /* No clobbers */); \
8996 result; \
8997 })
8998
8999 #define vmulxq_lane_f64(a, b, c) \
9000 __extension__ \
9001 ({ \
9002 float64x2_t b_ = (b); \
9003 float64x2_t a_ = (a); \
9004 float64x2_t result; \
9005 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9006 : "=w"(result) \
9007 : "w"(a_), "w"(b_), "i"(c) \
9008 : /* No clobbers */); \
9009 result; \
9010 })
9011
9012 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9013 vmulxs_f32 (float32_t a, float32_t b)
9014 {
9015 float32_t result;
9016 __asm__ ("fmulx %s0, %s1, %s2"
9017 : "=w"(result)
9018 : "w"(a), "w"(b)
9019 : /* No clobbers */);
9020 return result;
9021 }
9022
9023 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9024 vmvn_p8 (poly8x8_t a)
9025 {
9026 poly8x8_t result;
9027 __asm__ ("mvn %0.8b,%1.8b"
9028 : "=w"(result)
9029 : "w"(a)
9030 : /* No clobbers */);
9031 return result;
9032 }
9033
9034 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9035 vmvn_s8 (int8x8_t a)
9036 {
9037 int8x8_t result;
9038 __asm__ ("mvn %0.8b,%1.8b"
9039 : "=w"(result)
9040 : "w"(a)
9041 : /* No clobbers */);
9042 return result;
9043 }
9044
9045 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9046 vmvn_s16 (int16x4_t a)
9047 {
9048 int16x4_t result;
9049 __asm__ ("mvn %0.8b,%1.8b"
9050 : "=w"(result)
9051 : "w"(a)
9052 : /* No clobbers */);
9053 return result;
9054 }
9055
9056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9057 vmvn_s32 (int32x2_t a)
9058 {
9059 int32x2_t result;
9060 __asm__ ("mvn %0.8b,%1.8b"
9061 : "=w"(result)
9062 : "w"(a)
9063 : /* No clobbers */);
9064 return result;
9065 }
9066
9067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9068 vmvn_u8 (uint8x8_t a)
9069 {
9070 uint8x8_t result;
9071 __asm__ ("mvn %0.8b,%1.8b"
9072 : "=w"(result)
9073 : "w"(a)
9074 : /* No clobbers */);
9075 return result;
9076 }
9077
9078 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9079 vmvn_u16 (uint16x4_t a)
9080 {
9081 uint16x4_t result;
9082 __asm__ ("mvn %0.8b,%1.8b"
9083 : "=w"(result)
9084 : "w"(a)
9085 : /* No clobbers */);
9086 return result;
9087 }
9088
9089 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9090 vmvn_u32 (uint32x2_t a)
9091 {
9092 uint32x2_t result;
9093 __asm__ ("mvn %0.8b,%1.8b"
9094 : "=w"(result)
9095 : "w"(a)
9096 : /* No clobbers */);
9097 return result;
9098 }
9099
9100 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9101 vmvnq_p8 (poly8x16_t a)
9102 {
9103 poly8x16_t result;
9104 __asm__ ("mvn %0.16b,%1.16b"
9105 : "=w"(result)
9106 : "w"(a)
9107 : /* No clobbers */);
9108 return result;
9109 }
9110
9111 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9112 vmvnq_s8 (int8x16_t a)
9113 {
9114 int8x16_t result;
9115 __asm__ ("mvn %0.16b,%1.16b"
9116 : "=w"(result)
9117 : "w"(a)
9118 : /* No clobbers */);
9119 return result;
9120 }
9121
9122 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9123 vmvnq_s16 (int16x8_t a)
9124 {
9125 int16x8_t result;
9126 __asm__ ("mvn %0.16b,%1.16b"
9127 : "=w"(result)
9128 : "w"(a)
9129 : /* No clobbers */);
9130 return result;
9131 }
9132
9133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9134 vmvnq_s32 (int32x4_t a)
9135 {
9136 int32x4_t result;
9137 __asm__ ("mvn %0.16b,%1.16b"
9138 : "=w"(result)
9139 : "w"(a)
9140 : /* No clobbers */);
9141 return result;
9142 }
9143
9144 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9145 vmvnq_u8 (uint8x16_t a)
9146 {
9147 uint8x16_t result;
9148 __asm__ ("mvn %0.16b,%1.16b"
9149 : "=w"(result)
9150 : "w"(a)
9151 : /* No clobbers */);
9152 return result;
9153 }
9154
9155 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9156 vmvnq_u16 (uint16x8_t a)
9157 {
9158 uint16x8_t result;
9159 __asm__ ("mvn %0.16b,%1.16b"
9160 : "=w"(result)
9161 : "w"(a)
9162 : /* No clobbers */);
9163 return result;
9164 }
9165
9166 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9167 vmvnq_u32 (uint32x4_t a)
9168 {
9169 uint32x4_t result;
9170 __asm__ ("mvn %0.16b,%1.16b"
9171 : "=w"(result)
9172 : "w"(a)
9173 : /* No clobbers */);
9174 return result;
9175 }
9176
9177
9178 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9179 vpadal_s8 (int16x4_t a, int8x8_t b)
9180 {
9181 int16x4_t result;
9182 __asm__ ("sadalp %0.4h,%2.8b"
9183 : "=w"(result)
9184 : "0"(a), "w"(b)
9185 : /* No clobbers */);
9186 return result;
9187 }
9188
9189 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9190 vpadal_s16 (int32x2_t a, int16x4_t b)
9191 {
9192 int32x2_t result;
9193 __asm__ ("sadalp %0.2s,%2.4h"
9194 : "=w"(result)
9195 : "0"(a), "w"(b)
9196 : /* No clobbers */);
9197 return result;
9198 }
9199
9200 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9201 vpadal_s32 (int64x1_t a, int32x2_t b)
9202 {
9203 int64x1_t result;
9204 __asm__ ("sadalp %0.1d,%2.2s"
9205 : "=w"(result)
9206 : "0"(a), "w"(b)
9207 : /* No clobbers */);
9208 return result;
9209 }
9210
9211 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9212 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9213 {
9214 uint16x4_t result;
9215 __asm__ ("uadalp %0.4h,%2.8b"
9216 : "=w"(result)
9217 : "0"(a), "w"(b)
9218 : /* No clobbers */);
9219 return result;
9220 }
9221
9222 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9223 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9224 {
9225 uint32x2_t result;
9226 __asm__ ("uadalp %0.2s,%2.4h"
9227 : "=w"(result)
9228 : "0"(a), "w"(b)
9229 : /* No clobbers */);
9230 return result;
9231 }
9232
9233 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9234 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9235 {
9236 uint64x1_t result;
9237 __asm__ ("uadalp %0.1d,%2.2s"
9238 : "=w"(result)
9239 : "0"(a), "w"(b)
9240 : /* No clobbers */);
9241 return result;
9242 }
9243
9244 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9245 vpadalq_s8 (int16x8_t a, int8x16_t b)
9246 {
9247 int16x8_t result;
9248 __asm__ ("sadalp %0.8h,%2.16b"
9249 : "=w"(result)
9250 : "0"(a), "w"(b)
9251 : /* No clobbers */);
9252 return result;
9253 }
9254
9255 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9256 vpadalq_s16 (int32x4_t a, int16x8_t b)
9257 {
9258 int32x4_t result;
9259 __asm__ ("sadalp %0.4s,%2.8h"
9260 : "=w"(result)
9261 : "0"(a), "w"(b)
9262 : /* No clobbers */);
9263 return result;
9264 }
9265
9266 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9267 vpadalq_s32 (int64x2_t a, int32x4_t b)
9268 {
9269 int64x2_t result;
9270 __asm__ ("sadalp %0.2d,%2.4s"
9271 : "=w"(result)
9272 : "0"(a), "w"(b)
9273 : /* No clobbers */);
9274 return result;
9275 }
9276
9277 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9278 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9279 {
9280 uint16x8_t result;
9281 __asm__ ("uadalp %0.8h,%2.16b"
9282 : "=w"(result)
9283 : "0"(a), "w"(b)
9284 : /* No clobbers */);
9285 return result;
9286 }
9287
9288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9289 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9290 {
9291 uint32x4_t result;
9292 __asm__ ("uadalp %0.4s,%2.8h"
9293 : "=w"(result)
9294 : "0"(a), "w"(b)
9295 : /* No clobbers */);
9296 return result;
9297 }
9298
9299 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9300 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9301 {
9302 uint64x2_t result;
9303 __asm__ ("uadalp %0.2d,%2.4s"
9304 : "=w"(result)
9305 : "0"(a), "w"(b)
9306 : /* No clobbers */);
9307 return result;
9308 }
9309
9310 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9311 vpadd_f32 (float32x2_t a, float32x2_t b)
9312 {
9313 float32x2_t result;
9314 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9315 : "=w"(result)
9316 : "w"(a), "w"(b)
9317 : /* No clobbers */);
9318 return result;
9319 }
9320
9321 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9322 vpadd_s8 (int8x8_t __a, int8x8_t __b)
9323 {
9324 return __builtin_aarch64_addpv8qi (__a, __b);
9325 }
9326
9327 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9328 vpadd_s16 (int16x4_t __a, int16x4_t __b)
9329 {
9330 return __builtin_aarch64_addpv4hi (__a, __b);
9331 }
9332
9333 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9334 vpadd_s32 (int32x2_t __a, int32x2_t __b)
9335 {
9336 return __builtin_aarch64_addpv2si (__a, __b);
9337 }
9338
9339 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9340 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9341 {
9342 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9343 (int8x8_t) __b);
9344 }
9345
9346 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9347 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9348 {
9349 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9350 (int16x4_t) __b);
9351 }
9352
9353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9354 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9355 {
9356 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9357 (int32x2_t) __b);
9358 }
9359
9360 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9361 vpaddd_f64 (float64x2_t a)
9362 {
9363 float64_t result;
9364 __asm__ ("faddp %d0,%1.2d"
9365 : "=w"(result)
9366 : "w"(a)
9367 : /* No clobbers */);
9368 return result;
9369 }
9370
9371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9372 vpaddl_s8 (int8x8_t a)
9373 {
9374 int16x4_t result;
9375 __asm__ ("saddlp %0.4h,%1.8b"
9376 : "=w"(result)
9377 : "w"(a)
9378 : /* No clobbers */);
9379 return result;
9380 }
9381
9382 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9383 vpaddl_s16 (int16x4_t a)
9384 {
9385 int32x2_t result;
9386 __asm__ ("saddlp %0.2s,%1.4h"
9387 : "=w"(result)
9388 : "w"(a)
9389 : /* No clobbers */);
9390 return result;
9391 }
9392
9393 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9394 vpaddl_s32 (int32x2_t a)
9395 {
9396 int64x1_t result;
9397 __asm__ ("saddlp %0.1d,%1.2s"
9398 : "=w"(result)
9399 : "w"(a)
9400 : /* No clobbers */);
9401 return result;
9402 }
9403
9404 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9405 vpaddl_u8 (uint8x8_t a)
9406 {
9407 uint16x4_t result;
9408 __asm__ ("uaddlp %0.4h,%1.8b"
9409 : "=w"(result)
9410 : "w"(a)
9411 : /* No clobbers */);
9412 return result;
9413 }
9414
9415 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9416 vpaddl_u16 (uint16x4_t a)
9417 {
9418 uint32x2_t result;
9419 __asm__ ("uaddlp %0.2s,%1.4h"
9420 : "=w"(result)
9421 : "w"(a)
9422 : /* No clobbers */);
9423 return result;
9424 }
9425
9426 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9427 vpaddl_u32 (uint32x2_t a)
9428 {
9429 uint64x1_t result;
9430 __asm__ ("uaddlp %0.1d,%1.2s"
9431 : "=w"(result)
9432 : "w"(a)
9433 : /* No clobbers */);
9434 return result;
9435 }
9436
9437 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9438 vpaddlq_s8 (int8x16_t a)
9439 {
9440 int16x8_t result;
9441 __asm__ ("saddlp %0.8h,%1.16b"
9442 : "=w"(result)
9443 : "w"(a)
9444 : /* No clobbers */);
9445 return result;
9446 }
9447
9448 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9449 vpaddlq_s16 (int16x8_t a)
9450 {
9451 int32x4_t result;
9452 __asm__ ("saddlp %0.4s,%1.8h"
9453 : "=w"(result)
9454 : "w"(a)
9455 : /* No clobbers */);
9456 return result;
9457 }
9458
9459 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9460 vpaddlq_s32 (int32x4_t a)
9461 {
9462 int64x2_t result;
9463 __asm__ ("saddlp %0.2d,%1.4s"
9464 : "=w"(result)
9465 : "w"(a)
9466 : /* No clobbers */);
9467 return result;
9468 }
9469
9470 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9471 vpaddlq_u8 (uint8x16_t a)
9472 {
9473 uint16x8_t result;
9474 __asm__ ("uaddlp %0.8h,%1.16b"
9475 : "=w"(result)
9476 : "w"(a)
9477 : /* No clobbers */);
9478 return result;
9479 }
9480
9481 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9482 vpaddlq_u16 (uint16x8_t a)
9483 {
9484 uint32x4_t result;
9485 __asm__ ("uaddlp %0.4s,%1.8h"
9486 : "=w"(result)
9487 : "w"(a)
9488 : /* No clobbers */);
9489 return result;
9490 }
9491
9492 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9493 vpaddlq_u32 (uint32x4_t a)
9494 {
9495 uint64x2_t result;
9496 __asm__ ("uaddlp %0.2d,%1.4s"
9497 : "=w"(result)
9498 : "w"(a)
9499 : /* No clobbers */);
9500 return result;
9501 }
9502
9503 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9504 vpaddq_f32 (float32x4_t a, float32x4_t b)
9505 {
9506 float32x4_t result;
9507 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9508 : "=w"(result)
9509 : "w"(a), "w"(b)
9510 : /* No clobbers */);
9511 return result;
9512 }
9513
9514 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9515 vpaddq_f64 (float64x2_t a, float64x2_t b)
9516 {
9517 float64x2_t result;
9518 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9519 : "=w"(result)
9520 : "w"(a), "w"(b)
9521 : /* No clobbers */);
9522 return result;
9523 }
9524
9525 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9526 vpaddq_s8 (int8x16_t a, int8x16_t b)
9527 {
9528 int8x16_t result;
9529 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9530 : "=w"(result)
9531 : "w"(a), "w"(b)
9532 : /* No clobbers */);
9533 return result;
9534 }
9535
9536 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9537 vpaddq_s16 (int16x8_t a, int16x8_t b)
9538 {
9539 int16x8_t result;
9540 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9541 : "=w"(result)
9542 : "w"(a), "w"(b)
9543 : /* No clobbers */);
9544 return result;
9545 }
9546
9547 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9548 vpaddq_s32 (int32x4_t a, int32x4_t b)
9549 {
9550 int32x4_t result;
9551 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9552 : "=w"(result)
9553 : "w"(a), "w"(b)
9554 : /* No clobbers */);
9555 return result;
9556 }
9557
9558 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9559 vpaddq_s64 (int64x2_t a, int64x2_t b)
9560 {
9561 int64x2_t result;
9562 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9563 : "=w"(result)
9564 : "w"(a), "w"(b)
9565 : /* No clobbers */);
9566 return result;
9567 }
9568
9569 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9570 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9571 {
9572 uint8x16_t result;
9573 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9574 : "=w"(result)
9575 : "w"(a), "w"(b)
9576 : /* No clobbers */);
9577 return result;
9578 }
9579
9580 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9581 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9582 {
9583 uint16x8_t result;
9584 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9585 : "=w"(result)
9586 : "w"(a), "w"(b)
9587 : /* No clobbers */);
9588 return result;
9589 }
9590
9591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9592 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9593 {
9594 uint32x4_t result;
9595 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9596 : "=w"(result)
9597 : "w"(a), "w"(b)
9598 : /* No clobbers */);
9599 return result;
9600 }
9601
9602 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9603 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9604 {
9605 uint64x2_t result;
9606 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9607 : "=w"(result)
9608 : "w"(a), "w"(b)
9609 : /* No clobbers */);
9610 return result;
9611 }
9612
9613 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9614 vpadds_f32 (float32x2_t a)
9615 {
9616 float32_t result;
9617 __asm__ ("faddp %s0,%1.2s"
9618 : "=w"(result)
9619 : "w"(a)
9620 : /* No clobbers */);
9621 return result;
9622 }
9623
9624 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9625 vpmax_f32 (float32x2_t a, float32x2_t b)
9626 {
9627 float32x2_t result;
9628 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9629 : "=w"(result)
9630 : "w"(a), "w"(b)
9631 : /* No clobbers */);
9632 return result;
9633 }
9634
9635 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9636 vpmax_s8 (int8x8_t a, int8x8_t b)
9637 {
9638 int8x8_t result;
9639 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9640 : "=w"(result)
9641 : "w"(a), "w"(b)
9642 : /* No clobbers */);
9643 return result;
9644 }
9645
9646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9647 vpmax_s16 (int16x4_t a, int16x4_t b)
9648 {
9649 int16x4_t result;
9650 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9651 : "=w"(result)
9652 : "w"(a), "w"(b)
9653 : /* No clobbers */);
9654 return result;
9655 }
9656
9657 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9658 vpmax_s32 (int32x2_t a, int32x2_t b)
9659 {
9660 int32x2_t result;
9661 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9662 : "=w"(result)
9663 : "w"(a), "w"(b)
9664 : /* No clobbers */);
9665 return result;
9666 }
9667
9668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9669 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9670 {
9671 uint8x8_t result;
9672 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9673 : "=w"(result)
9674 : "w"(a), "w"(b)
9675 : /* No clobbers */);
9676 return result;
9677 }
9678
9679 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9680 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9681 {
9682 uint16x4_t result;
9683 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9684 : "=w"(result)
9685 : "w"(a), "w"(b)
9686 : /* No clobbers */);
9687 return result;
9688 }
9689
9690 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9691 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9692 {
9693 uint32x2_t result;
9694 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9695 : "=w"(result)
9696 : "w"(a), "w"(b)
9697 : /* No clobbers */);
9698 return result;
9699 }
9700
9701 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9702 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9703 {
9704 float32x2_t result;
9705 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9706 : "=w"(result)
9707 : "w"(a), "w"(b)
9708 : /* No clobbers */);
9709 return result;
9710 }
9711
9712 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9713 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9714 {
9715 float32x4_t result;
9716 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9717 : "=w"(result)
9718 : "w"(a), "w"(b)
9719 : /* No clobbers */);
9720 return result;
9721 }
9722
9723 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9724 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9725 {
9726 float64x2_t result;
9727 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9728 : "=w"(result)
9729 : "w"(a), "w"(b)
9730 : /* No clobbers */);
9731 return result;
9732 }
9733
9734 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9735 vpmaxnmqd_f64 (float64x2_t a)
9736 {
9737 float64_t result;
9738 __asm__ ("fmaxnmp %d0,%1.2d"
9739 : "=w"(result)
9740 : "w"(a)
9741 : /* No clobbers */);
9742 return result;
9743 }
9744
9745 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9746 vpmaxnms_f32 (float32x2_t a)
9747 {
9748 float32_t result;
9749 __asm__ ("fmaxnmp %s0,%1.2s"
9750 : "=w"(result)
9751 : "w"(a)
9752 : /* No clobbers */);
9753 return result;
9754 }
9755
9756 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9757 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9758 {
9759 float32x4_t result;
9760 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9761 : "=w"(result)
9762 : "w"(a), "w"(b)
9763 : /* No clobbers */);
9764 return result;
9765 }
9766
9767 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9768 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9769 {
9770 float64x2_t result;
9771 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9772 : "=w"(result)
9773 : "w"(a), "w"(b)
9774 : /* No clobbers */);
9775 return result;
9776 }
9777
9778 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9779 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9780 {
9781 int8x16_t result;
9782 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9783 : "=w"(result)
9784 : "w"(a), "w"(b)
9785 : /* No clobbers */);
9786 return result;
9787 }
9788
9789 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9790 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9791 {
9792 int16x8_t result;
9793 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9794 : "=w"(result)
9795 : "w"(a), "w"(b)
9796 : /* No clobbers */);
9797 return result;
9798 }
9799
9800 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9801 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9802 {
9803 int32x4_t result;
9804 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9805 : "=w"(result)
9806 : "w"(a), "w"(b)
9807 : /* No clobbers */);
9808 return result;
9809 }
9810
9811 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9812 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9813 {
9814 uint8x16_t result;
9815 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9816 : "=w"(result)
9817 : "w"(a), "w"(b)
9818 : /* No clobbers */);
9819 return result;
9820 }
9821
9822 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9823 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9824 {
9825 uint16x8_t result;
9826 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9827 : "=w"(result)
9828 : "w"(a), "w"(b)
9829 : /* No clobbers */);
9830 return result;
9831 }
9832
9833 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9834 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9835 {
9836 uint32x4_t result;
9837 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9838 : "=w"(result)
9839 : "w"(a), "w"(b)
9840 : /* No clobbers */);
9841 return result;
9842 }
9843
9844 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9845 vpmaxqd_f64 (float64x2_t a)
9846 {
9847 float64_t result;
9848 __asm__ ("fmaxp %d0,%1.2d"
9849 : "=w"(result)
9850 : "w"(a)
9851 : /* No clobbers */);
9852 return result;
9853 }
9854
9855 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9856 vpmaxs_f32 (float32x2_t a)
9857 {
9858 float32_t result;
9859 __asm__ ("fmaxp %s0,%1.2s"
9860 : "=w"(result)
9861 : "w"(a)
9862 : /* No clobbers */);
9863 return result;
9864 }
9865
9866 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9867 vpmin_f32 (float32x2_t a, float32x2_t b)
9868 {
9869 float32x2_t result;
9870 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9871 : "=w"(result)
9872 : "w"(a), "w"(b)
9873 : /* No clobbers */);
9874 return result;
9875 }
9876
9877 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9878 vpmin_s8 (int8x8_t a, int8x8_t b)
9879 {
9880 int8x8_t result;
9881 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9882 : "=w"(result)
9883 : "w"(a), "w"(b)
9884 : /* No clobbers */);
9885 return result;
9886 }
9887
9888 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9889 vpmin_s16 (int16x4_t a, int16x4_t b)
9890 {
9891 int16x4_t result;
9892 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9893 : "=w"(result)
9894 : "w"(a), "w"(b)
9895 : /* No clobbers */);
9896 return result;
9897 }
9898
9899 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9900 vpmin_s32 (int32x2_t a, int32x2_t b)
9901 {
9902 int32x2_t result;
9903 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9904 : "=w"(result)
9905 : "w"(a), "w"(b)
9906 : /* No clobbers */);
9907 return result;
9908 }
9909
9910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9911 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9912 {
9913 uint8x8_t result;
9914 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9915 : "=w"(result)
9916 : "w"(a), "w"(b)
9917 : /* No clobbers */);
9918 return result;
9919 }
9920
9921 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9922 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9923 {
9924 uint16x4_t result;
9925 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9926 : "=w"(result)
9927 : "w"(a), "w"(b)
9928 : /* No clobbers */);
9929 return result;
9930 }
9931
9932 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9933 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9934 {
9935 uint32x2_t result;
9936 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9937 : "=w"(result)
9938 : "w"(a), "w"(b)
9939 : /* No clobbers */);
9940 return result;
9941 }
9942
9943 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9944 vpminnm_f32 (float32x2_t a, float32x2_t b)
9945 {
9946 float32x2_t result;
9947 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9948 : "=w"(result)
9949 : "w"(a), "w"(b)
9950 : /* No clobbers */);
9951 return result;
9952 }
9953
9954 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9955 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9956 {
9957 float32x4_t result;
9958 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9959 : "=w"(result)
9960 : "w"(a), "w"(b)
9961 : /* No clobbers */);
9962 return result;
9963 }
9964
9965 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9966 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9967 {
9968 float64x2_t result;
9969 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9970 : "=w"(result)
9971 : "w"(a), "w"(b)
9972 : /* No clobbers */);
9973 return result;
9974 }
9975
9976 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9977 vpminnmqd_f64 (float64x2_t a)
9978 {
9979 float64_t result;
9980 __asm__ ("fminnmp %d0,%1.2d"
9981 : "=w"(result)
9982 : "w"(a)
9983 : /* No clobbers */);
9984 return result;
9985 }
9986
9987 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9988 vpminnms_f32 (float32x2_t a)
9989 {
9990 float32_t result;
9991 __asm__ ("fminnmp %s0,%1.2s"
9992 : "=w"(result)
9993 : "w"(a)
9994 : /* No clobbers */);
9995 return result;
9996 }
9997
9998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9999 vpminq_f32 (float32x4_t a, float32x4_t b)
10000 {
10001 float32x4_t result;
10002 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
10003 : "=w"(result)
10004 : "w"(a), "w"(b)
10005 : /* No clobbers */);
10006 return result;
10007 }
10008
10009 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10010 vpminq_f64 (float64x2_t a, float64x2_t b)
10011 {
10012 float64x2_t result;
10013 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
10014 : "=w"(result)
10015 : "w"(a), "w"(b)
10016 : /* No clobbers */);
10017 return result;
10018 }
10019
10020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10021 vpminq_s8 (int8x16_t a, int8x16_t b)
10022 {
10023 int8x16_t result;
10024 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
10025 : "=w"(result)
10026 : "w"(a), "w"(b)
10027 : /* No clobbers */);
10028 return result;
10029 }
10030
10031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10032 vpminq_s16 (int16x8_t a, int16x8_t b)
10033 {
10034 int16x8_t result;
10035 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
10036 : "=w"(result)
10037 : "w"(a), "w"(b)
10038 : /* No clobbers */);
10039 return result;
10040 }
10041
10042 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10043 vpminq_s32 (int32x4_t a, int32x4_t b)
10044 {
10045 int32x4_t result;
10046 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10047 : "=w"(result)
10048 : "w"(a), "w"(b)
10049 : /* No clobbers */);
10050 return result;
10051 }
10052
10053 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10054 vpminq_u8 (uint8x16_t a, uint8x16_t b)
10055 {
10056 uint8x16_t result;
10057 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10058 : "=w"(result)
10059 : "w"(a), "w"(b)
10060 : /* No clobbers */);
10061 return result;
10062 }
10063
10064 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10065 vpminq_u16 (uint16x8_t a, uint16x8_t b)
10066 {
10067 uint16x8_t result;
10068 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10069 : "=w"(result)
10070 : "w"(a), "w"(b)
10071 : /* No clobbers */);
10072 return result;
10073 }
10074
10075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10076 vpminq_u32 (uint32x4_t a, uint32x4_t b)
10077 {
10078 uint32x4_t result;
10079 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10080 : "=w"(result)
10081 : "w"(a), "w"(b)
10082 : /* No clobbers */);
10083 return result;
10084 }
10085
10086 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10087 vpminqd_f64 (float64x2_t a)
10088 {
10089 float64_t result;
10090 __asm__ ("fminp %d0,%1.2d"
10091 : "=w"(result)
10092 : "w"(a)
10093 : /* No clobbers */);
10094 return result;
10095 }
10096
10097 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10098 vpmins_f32 (float32x2_t a)
10099 {
10100 float32_t result;
10101 __asm__ ("fminp %s0,%1.2s"
10102 : "=w"(result)
10103 : "w"(a)
10104 : /* No clobbers */);
10105 return result;
10106 }
10107
10108 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10109 vqdmulh_n_s16 (int16x4_t a, int16_t b)
10110 {
10111 int16x4_t result;
10112 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10113 : "=w"(result)
10114 : "w"(a), "w"(b)
10115 : /* No clobbers */);
10116 return result;
10117 }
10118
10119 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10120 vqdmulh_n_s32 (int32x2_t a, int32_t b)
10121 {
10122 int32x2_t result;
10123 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10124 : "=w"(result)
10125 : "w"(a), "w"(b)
10126 : /* No clobbers */);
10127 return result;
10128 }
10129
10130 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10131 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10132 {
10133 int16x8_t result;
10134 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10135 : "=w"(result)
10136 : "w"(a), "w"(b)
10137 : /* No clobbers */);
10138 return result;
10139 }
10140
10141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10142 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10143 {
10144 int32x4_t result;
10145 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10146 : "=w"(result)
10147 : "w"(a), "w"(b)
10148 : /* No clobbers */);
10149 return result;
10150 }
10151
10152 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10153 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10154 {
10155 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10156 __asm__ ("sqxtn2 %0.16b, %1.8h"
10157 : "+w"(result)
10158 : "w"(b)
10159 : /* No clobbers */);
10160 return result;
10161 }
10162
10163 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10164 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10165 {
10166 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10167 __asm__ ("sqxtn2 %0.8h, %1.4s"
10168 : "+w"(result)
10169 : "w"(b)
10170 : /* No clobbers */);
10171 return result;
10172 }
10173
10174 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10175 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10176 {
10177 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10178 __asm__ ("sqxtn2 %0.4s, %1.2d"
10179 : "+w"(result)
10180 : "w"(b)
10181 : /* No clobbers */);
10182 return result;
10183 }
10184
10185 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10186 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10187 {
10188 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10189 __asm__ ("uqxtn2 %0.16b, %1.8h"
10190 : "+w"(result)
10191 : "w"(b)
10192 : /* No clobbers */);
10193 return result;
10194 }
10195
10196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10197 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10198 {
10199 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10200 __asm__ ("uqxtn2 %0.8h, %1.4s"
10201 : "+w"(result)
10202 : "w"(b)
10203 : /* No clobbers */);
10204 return result;
10205 }
10206
10207 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10208 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10209 {
10210 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10211 __asm__ ("uqxtn2 %0.4s, %1.2d"
10212 : "+w"(result)
10213 : "w"(b)
10214 : /* No clobbers */);
10215 return result;
10216 }
10217
10218 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10219 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10220 {
10221 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10222 __asm__ ("sqxtun2 %0.16b, %1.8h"
10223 : "+w"(result)
10224 : "w"(b)
10225 : /* No clobbers */);
10226 return result;
10227 }
10228
10229 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10230 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10231 {
10232 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10233 __asm__ ("sqxtun2 %0.8h, %1.4s"
10234 : "+w"(result)
10235 : "w"(b)
10236 : /* No clobbers */);
10237 return result;
10238 }
10239
10240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10241 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10242 {
10243 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10244 __asm__ ("sqxtun2 %0.4s, %1.2d"
10245 : "+w"(result)
10246 : "w"(b)
10247 : /* No clobbers */);
10248 return result;
10249 }
10250
10251 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10252 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10253 {
10254 int16x4_t result;
10255 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10256 : "=w"(result)
10257 : "w"(a), "x"(b)
10258 : /* No clobbers */);
10259 return result;
10260 }
10261
10262 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10263 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10264 {
10265 int32x2_t result;
10266 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10267 : "=w"(result)
10268 : "w"(a), "w"(b)
10269 : /* No clobbers */);
10270 return result;
10271 }
10272
10273 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10274 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10275 {
10276 int16x8_t result;
10277 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10278 : "=w"(result)
10279 : "w"(a), "x"(b)
10280 : /* No clobbers */);
10281 return result;
10282 }
10283
10284 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10285 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10286 {
10287 int32x4_t result;
10288 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10289 : "=w"(result)
10290 : "w"(a), "w"(b)
10291 : /* No clobbers */);
10292 return result;
10293 }
10294
10295 #define vqrshrn_high_n_s16(a, b, c) \
10296 __extension__ \
10297 ({ \
10298 int16x8_t b_ = (b); \
10299 int8x8_t a_ = (a); \
10300 int8x16_t result = vcombine_s8 \
10301 (a_, vcreate_s8 \
10302 (__AARCH64_UINT64_C (0x0))); \
10303 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10304 : "+w"(result) \
10305 : "w"(b_), "i"(c) \
10306 : /* No clobbers */); \
10307 result; \
10308 })
10309
10310 #define vqrshrn_high_n_s32(a, b, c) \
10311 __extension__ \
10312 ({ \
10313 int32x4_t b_ = (b); \
10314 int16x4_t a_ = (a); \
10315 int16x8_t result = vcombine_s16 \
10316 (a_, vcreate_s16 \
10317 (__AARCH64_UINT64_C (0x0))); \
10318 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10319 : "+w"(result) \
10320 : "w"(b_), "i"(c) \
10321 : /* No clobbers */); \
10322 result; \
10323 })
10324
10325 #define vqrshrn_high_n_s64(a, b, c) \
10326 __extension__ \
10327 ({ \
10328 int64x2_t b_ = (b); \
10329 int32x2_t a_ = (a); \
10330 int32x4_t result = vcombine_s32 \
10331 (a_, vcreate_s32 \
10332 (__AARCH64_UINT64_C (0x0))); \
10333 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10334 : "+w"(result) \
10335 : "w"(b_), "i"(c) \
10336 : /* No clobbers */); \
10337 result; \
10338 })
10339
10340 #define vqrshrn_high_n_u16(a, b, c) \
10341 __extension__ \
10342 ({ \
10343 uint16x8_t b_ = (b); \
10344 uint8x8_t a_ = (a); \
10345 uint8x16_t result = vcombine_u8 \
10346 (a_, vcreate_u8 \
10347 (__AARCH64_UINT64_C (0x0))); \
10348 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10349 : "+w"(result) \
10350 : "w"(b_), "i"(c) \
10351 : /* No clobbers */); \
10352 result; \
10353 })
10354
10355 #define vqrshrn_high_n_u32(a, b, c) \
10356 __extension__ \
10357 ({ \
10358 uint32x4_t b_ = (b); \
10359 uint16x4_t a_ = (a); \
10360 uint16x8_t result = vcombine_u16 \
10361 (a_, vcreate_u16 \
10362 (__AARCH64_UINT64_C (0x0))); \
10363 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10364 : "+w"(result) \
10365 : "w"(b_), "i"(c) \
10366 : /* No clobbers */); \
10367 result; \
10368 })
10369
10370 #define vqrshrn_high_n_u64(a, b, c) \
10371 __extension__ \
10372 ({ \
10373 uint64x2_t b_ = (b); \
10374 uint32x2_t a_ = (a); \
10375 uint32x4_t result = vcombine_u32 \
10376 (a_, vcreate_u32 \
10377 (__AARCH64_UINT64_C (0x0))); \
10378 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10379 : "+w"(result) \
10380 : "w"(b_), "i"(c) \
10381 : /* No clobbers */); \
10382 result; \
10383 })
10384
10385 #define vqrshrun_high_n_s16(a, b, c) \
10386 __extension__ \
10387 ({ \
10388 int16x8_t b_ = (b); \
10389 uint8x8_t a_ = (a); \
10390 uint8x16_t result = vcombine_u8 \
10391 (a_, vcreate_u8 \
10392 (__AARCH64_UINT64_C (0x0))); \
10393 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10394 : "+w"(result) \
10395 : "w"(b_), "i"(c) \
10396 : /* No clobbers */); \
10397 result; \
10398 })
10399
10400 #define vqrshrun_high_n_s32(a, b, c) \
10401 __extension__ \
10402 ({ \
10403 int32x4_t b_ = (b); \
10404 uint16x4_t a_ = (a); \
10405 uint16x8_t result = vcombine_u16 \
10406 (a_, vcreate_u16 \
10407 (__AARCH64_UINT64_C (0x0))); \
10408 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10409 : "+w"(result) \
10410 : "w"(b_), "i"(c) \
10411 : /* No clobbers */); \
10412 result; \
10413 })
10414
10415 #define vqrshrun_high_n_s64(a, b, c) \
10416 __extension__ \
10417 ({ \
10418 int64x2_t b_ = (b); \
10419 uint32x2_t a_ = (a); \
10420 uint32x4_t result = vcombine_u32 \
10421 (a_, vcreate_u32 \
10422 (__AARCH64_UINT64_C (0x0))); \
10423 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10424 : "+w"(result) \
10425 : "w"(b_), "i"(c) \
10426 : /* No clobbers */); \
10427 result; \
10428 })
10429
10430 #define vqshrn_high_n_s16(a, b, c) \
10431 __extension__ \
10432 ({ \
10433 int16x8_t b_ = (b); \
10434 int8x8_t a_ = (a); \
10435 int8x16_t result = vcombine_s8 \
10436 (a_, vcreate_s8 \
10437 (__AARCH64_UINT64_C (0x0))); \
10438 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10439 : "+w"(result) \
10440 : "w"(b_), "i"(c) \
10441 : /* No clobbers */); \
10442 result; \
10443 })
10444
10445 #define vqshrn_high_n_s32(a, b, c) \
10446 __extension__ \
10447 ({ \
10448 int32x4_t b_ = (b); \
10449 int16x4_t a_ = (a); \
10450 int16x8_t result = vcombine_s16 \
10451 (a_, vcreate_s16 \
10452 (__AARCH64_UINT64_C (0x0))); \
10453 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10454 : "+w"(result) \
10455 : "w"(b_), "i"(c) \
10456 : /* No clobbers */); \
10457 result; \
10458 })
10459
10460 #define vqshrn_high_n_s64(a, b, c) \
10461 __extension__ \
10462 ({ \
10463 int64x2_t b_ = (b); \
10464 int32x2_t a_ = (a); \
10465 int32x4_t result = vcombine_s32 \
10466 (a_, vcreate_s32 \
10467 (__AARCH64_UINT64_C (0x0))); \
10468 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10469 : "+w"(result) \
10470 : "w"(b_), "i"(c) \
10471 : /* No clobbers */); \
10472 result; \
10473 })
10474
10475 #define vqshrn_high_n_u16(a, b, c) \
10476 __extension__ \
10477 ({ \
10478 uint16x8_t b_ = (b); \
10479 uint8x8_t a_ = (a); \
10480 uint8x16_t result = vcombine_u8 \
10481 (a_, vcreate_u8 \
10482 (__AARCH64_UINT64_C (0x0))); \
10483 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10484 : "+w"(result) \
10485 : "w"(b_), "i"(c) \
10486 : /* No clobbers */); \
10487 result; \
10488 })
10489
10490 #define vqshrn_high_n_u32(a, b, c) \
10491 __extension__ \
10492 ({ \
10493 uint32x4_t b_ = (b); \
10494 uint16x4_t a_ = (a); \
10495 uint16x8_t result = vcombine_u16 \
10496 (a_, vcreate_u16 \
10497 (__AARCH64_UINT64_C (0x0))); \
10498 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10499 : "+w"(result) \
10500 : "w"(b_), "i"(c) \
10501 : /* No clobbers */); \
10502 result; \
10503 })
10504
10505 #define vqshrn_high_n_u64(a, b, c) \
10506 __extension__ \
10507 ({ \
10508 uint64x2_t b_ = (b); \
10509 uint32x2_t a_ = (a); \
10510 uint32x4_t result = vcombine_u32 \
10511 (a_, vcreate_u32 \
10512 (__AARCH64_UINT64_C (0x0))); \
10513 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10514 : "+w"(result) \
10515 : "w"(b_), "i"(c) \
10516 : /* No clobbers */); \
10517 result; \
10518 })
10519
10520 #define vqshrun_high_n_s16(a, b, c) \
10521 __extension__ \
10522 ({ \
10523 int16x8_t b_ = (b); \
10524 uint8x8_t a_ = (a); \
10525 uint8x16_t result = vcombine_u8 \
10526 (a_, vcreate_u8 \
10527 (__AARCH64_UINT64_C (0x0))); \
10528 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10529 : "+w"(result) \
10530 : "w"(b_), "i"(c) \
10531 : /* No clobbers */); \
10532 result; \
10533 })
10534
10535 #define vqshrun_high_n_s32(a, b, c) \
10536 __extension__ \
10537 ({ \
10538 int32x4_t b_ = (b); \
10539 uint16x4_t a_ = (a); \
10540 uint16x8_t result = vcombine_u16 \
10541 (a_, vcreate_u16 \
10542 (__AARCH64_UINT64_C (0x0))); \
10543 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10544 : "+w"(result) \
10545 : "w"(b_), "i"(c) \
10546 : /* No clobbers */); \
10547 result; \
10548 })
10549
10550 #define vqshrun_high_n_s64(a, b, c) \
10551 __extension__ \
10552 ({ \
10553 int64x2_t b_ = (b); \
10554 uint32x2_t a_ = (a); \
10555 uint32x4_t result = vcombine_u32 \
10556 (a_, vcreate_u32 \
10557 (__AARCH64_UINT64_C (0x0))); \
10558 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10559 : "+w"(result) \
10560 : "w"(b_), "i"(c) \
10561 : /* No clobbers */); \
10562 result; \
10563 })
10564
10565 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10566 vrbit_s8 (int8x8_t a)
10567 {
10568 int8x8_t result;
10569 __asm__ ("rbit %0.8b,%1.8b"
10570 : "=w"(result)
10571 : "w"(a)
10572 : /* No clobbers */);
10573 return result;
10574 }
10575
10576 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10577 vrbit_u8 (uint8x8_t a)
10578 {
10579 uint8x8_t result;
10580 __asm__ ("rbit %0.8b,%1.8b"
10581 : "=w"(result)
10582 : "w"(a)
10583 : /* No clobbers */);
10584 return result;
10585 }
10586
10587 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10588 vrbitq_s8 (int8x16_t a)
10589 {
10590 int8x16_t result;
10591 __asm__ ("rbit %0.16b,%1.16b"
10592 : "=w"(result)
10593 : "w"(a)
10594 : /* No clobbers */);
10595 return result;
10596 }
10597
10598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10599 vrbitq_u8 (uint8x16_t a)
10600 {
10601 uint8x16_t result;
10602 __asm__ ("rbit %0.16b,%1.16b"
10603 : "=w"(result)
10604 : "w"(a)
10605 : /* No clobbers */);
10606 return result;
10607 }
10608
10609 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10610 vrecpe_u32 (uint32x2_t a)
10611 {
10612 uint32x2_t result;
10613 __asm__ ("urecpe %0.2s,%1.2s"
10614 : "=w"(result)
10615 : "w"(a)
10616 : /* No clobbers */);
10617 return result;
10618 }
10619
10620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10621 vrecpeq_u32 (uint32x4_t a)
10622 {
10623 uint32x4_t result;
10624 __asm__ ("urecpe %0.4s,%1.4s"
10625 : "=w"(result)
10626 : "w"(a)
10627 : /* No clobbers */);
10628 return result;
10629 }
10630
10631 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10632 vrev16_p8 (poly8x8_t a)
10633 {
10634 poly8x8_t result;
10635 __asm__ ("rev16 %0.8b,%1.8b"
10636 : "=w"(result)
10637 : "w"(a)
10638 : /* No clobbers */);
10639 return result;
10640 }
10641
10642 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10643 vrev16_s8 (int8x8_t a)
10644 {
10645 int8x8_t result;
10646 __asm__ ("rev16 %0.8b,%1.8b"
10647 : "=w"(result)
10648 : "w"(a)
10649 : /* No clobbers */);
10650 return result;
10651 }
10652
10653 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10654 vrev16_u8 (uint8x8_t a)
10655 {
10656 uint8x8_t result;
10657 __asm__ ("rev16 %0.8b,%1.8b"
10658 : "=w"(result)
10659 : "w"(a)
10660 : /* No clobbers */);
10661 return result;
10662 }
10663
10664 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10665 vrev16q_p8 (poly8x16_t a)
10666 {
10667 poly8x16_t result;
10668 __asm__ ("rev16 %0.16b,%1.16b"
10669 : "=w"(result)
10670 : "w"(a)
10671 : /* No clobbers */);
10672 return result;
10673 }
10674
10675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10676 vrev16q_s8 (int8x16_t a)
10677 {
10678 int8x16_t result;
10679 __asm__ ("rev16 %0.16b,%1.16b"
10680 : "=w"(result)
10681 : "w"(a)
10682 : /* No clobbers */);
10683 return result;
10684 }
10685
10686 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10687 vrev16q_u8 (uint8x16_t a)
10688 {
10689 uint8x16_t result;
10690 __asm__ ("rev16 %0.16b,%1.16b"
10691 : "=w"(result)
10692 : "w"(a)
10693 : /* No clobbers */);
10694 return result;
10695 }
10696
10697 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10698 vrev32_p8 (poly8x8_t a)
10699 {
10700 poly8x8_t result;
10701 __asm__ ("rev32 %0.8b,%1.8b"
10702 : "=w"(result)
10703 : "w"(a)
10704 : /* No clobbers */);
10705 return result;
10706 }
10707
10708 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10709 vrev32_p16 (poly16x4_t a)
10710 {
10711 poly16x4_t result;
10712 __asm__ ("rev32 %0.4h,%1.4h"
10713 : "=w"(result)
10714 : "w"(a)
10715 : /* No clobbers */);
10716 return result;
10717 }
10718
10719 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10720 vrev32_s8 (int8x8_t a)
10721 {
10722 int8x8_t result;
10723 __asm__ ("rev32 %0.8b,%1.8b"
10724 : "=w"(result)
10725 : "w"(a)
10726 : /* No clobbers */);
10727 return result;
10728 }
10729
10730 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10731 vrev32_s16 (int16x4_t a)
10732 {
10733 int16x4_t result;
10734 __asm__ ("rev32 %0.4h,%1.4h"
10735 : "=w"(result)
10736 : "w"(a)
10737 : /* No clobbers */);
10738 return result;
10739 }
10740
10741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10742 vrev32_u8 (uint8x8_t a)
10743 {
10744 uint8x8_t result;
10745 __asm__ ("rev32 %0.8b,%1.8b"
10746 : "=w"(result)
10747 : "w"(a)
10748 : /* No clobbers */);
10749 return result;
10750 }
10751
10752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10753 vrev32_u16 (uint16x4_t a)
10754 {
10755 uint16x4_t result;
10756 __asm__ ("rev32 %0.4h,%1.4h"
10757 : "=w"(result)
10758 : "w"(a)
10759 : /* No clobbers */);
10760 return result;
10761 }
10762
10763 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10764 vrev32q_p8 (poly8x16_t a)
10765 {
10766 poly8x16_t result;
10767 __asm__ ("rev32 %0.16b,%1.16b"
10768 : "=w"(result)
10769 : "w"(a)
10770 : /* No clobbers */);
10771 return result;
10772 }
10773
10774 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10775 vrev32q_p16 (poly16x8_t a)
10776 {
10777 poly16x8_t result;
10778 __asm__ ("rev32 %0.8h,%1.8h"
10779 : "=w"(result)
10780 : "w"(a)
10781 : /* No clobbers */);
10782 return result;
10783 }
10784
10785 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10786 vrev32q_s8 (int8x16_t a)
10787 {
10788 int8x16_t result;
10789 __asm__ ("rev32 %0.16b,%1.16b"
10790 : "=w"(result)
10791 : "w"(a)
10792 : /* No clobbers */);
10793 return result;
10794 }
10795
10796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10797 vrev32q_s16 (int16x8_t a)
10798 {
10799 int16x8_t result;
10800 __asm__ ("rev32 %0.8h,%1.8h"
10801 : "=w"(result)
10802 : "w"(a)
10803 : /* No clobbers */);
10804 return result;
10805 }
10806
10807 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10808 vrev32q_u8 (uint8x16_t a)
10809 {
10810 uint8x16_t result;
10811 __asm__ ("rev32 %0.16b,%1.16b"
10812 : "=w"(result)
10813 : "w"(a)
10814 : /* No clobbers */);
10815 return result;
10816 }
10817
10818 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10819 vrev32q_u16 (uint16x8_t a)
10820 {
10821 uint16x8_t result;
10822 __asm__ ("rev32 %0.8h,%1.8h"
10823 : "=w"(result)
10824 : "w"(a)
10825 : /* No clobbers */);
10826 return result;
10827 }
10828
10829 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10830 vrev64_f32 (float32x2_t a)
10831 {
10832 float32x2_t result;
10833 __asm__ ("rev64 %0.2s,%1.2s"
10834 : "=w"(result)
10835 : "w"(a)
10836 : /* No clobbers */);
10837 return result;
10838 }
10839
10840 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10841 vrev64_p8 (poly8x8_t a)
10842 {
10843 poly8x8_t result;
10844 __asm__ ("rev64 %0.8b,%1.8b"
10845 : "=w"(result)
10846 : "w"(a)
10847 : /* No clobbers */);
10848 return result;
10849 }
10850
10851 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10852 vrev64_p16 (poly16x4_t a)
10853 {
10854 poly16x4_t result;
10855 __asm__ ("rev64 %0.4h,%1.4h"
10856 : "=w"(result)
10857 : "w"(a)
10858 : /* No clobbers */);
10859 return result;
10860 }
10861
10862 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10863 vrev64_s8 (int8x8_t a)
10864 {
10865 int8x8_t result;
10866 __asm__ ("rev64 %0.8b,%1.8b"
10867 : "=w"(result)
10868 : "w"(a)
10869 : /* No clobbers */);
10870 return result;
10871 }
10872
10873 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10874 vrev64_s16 (int16x4_t a)
10875 {
10876 int16x4_t result;
10877 __asm__ ("rev64 %0.4h,%1.4h"
10878 : "=w"(result)
10879 : "w"(a)
10880 : /* No clobbers */);
10881 return result;
10882 }
10883
10884 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10885 vrev64_s32 (int32x2_t a)
10886 {
10887 int32x2_t result;
10888 __asm__ ("rev64 %0.2s,%1.2s"
10889 : "=w"(result)
10890 : "w"(a)
10891 : /* No clobbers */);
10892 return result;
10893 }
10894
10895 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10896 vrev64_u8 (uint8x8_t a)
10897 {
10898 uint8x8_t result;
10899 __asm__ ("rev64 %0.8b,%1.8b"
10900 : "=w"(result)
10901 : "w"(a)
10902 : /* No clobbers */);
10903 return result;
10904 }
10905
10906 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10907 vrev64_u16 (uint16x4_t a)
10908 {
10909 uint16x4_t result;
10910 __asm__ ("rev64 %0.4h,%1.4h"
10911 : "=w"(result)
10912 : "w"(a)
10913 : /* No clobbers */);
10914 return result;
10915 }
10916
10917 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10918 vrev64_u32 (uint32x2_t a)
10919 {
10920 uint32x2_t result;
10921 __asm__ ("rev64 %0.2s,%1.2s"
10922 : "=w"(result)
10923 : "w"(a)
10924 : /* No clobbers */);
10925 return result;
10926 }
10927
10928 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10929 vrev64q_f32 (float32x4_t a)
10930 {
10931 float32x4_t result;
10932 __asm__ ("rev64 %0.4s,%1.4s"
10933 : "=w"(result)
10934 : "w"(a)
10935 : /* No clobbers */);
10936 return result;
10937 }
10938
10939 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10940 vrev64q_p8 (poly8x16_t a)
10941 {
10942 poly8x16_t result;
10943 __asm__ ("rev64 %0.16b,%1.16b"
10944 : "=w"(result)
10945 : "w"(a)
10946 : /* No clobbers */);
10947 return result;
10948 }
10949
10950 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10951 vrev64q_p16 (poly16x8_t a)
10952 {
10953 poly16x8_t result;
10954 __asm__ ("rev64 %0.8h,%1.8h"
10955 : "=w"(result)
10956 : "w"(a)
10957 : /* No clobbers */);
10958 return result;
10959 }
10960
10961 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10962 vrev64q_s8 (int8x16_t a)
10963 {
10964 int8x16_t result;
10965 __asm__ ("rev64 %0.16b,%1.16b"
10966 : "=w"(result)
10967 : "w"(a)
10968 : /* No clobbers */);
10969 return result;
10970 }
10971
10972 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10973 vrev64q_s16 (int16x8_t a)
10974 {
10975 int16x8_t result;
10976 __asm__ ("rev64 %0.8h,%1.8h"
10977 : "=w"(result)
10978 : "w"(a)
10979 : /* No clobbers */);
10980 return result;
10981 }
10982
10983 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10984 vrev64q_s32 (int32x4_t a)
10985 {
10986 int32x4_t result;
10987 __asm__ ("rev64 %0.4s,%1.4s"
10988 : "=w"(result)
10989 : "w"(a)
10990 : /* No clobbers */);
10991 return result;
10992 }
10993
10994 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10995 vrev64q_u8 (uint8x16_t a)
10996 {
10997 uint8x16_t result;
10998 __asm__ ("rev64 %0.16b,%1.16b"
10999 : "=w"(result)
11000 : "w"(a)
11001 : /* No clobbers */);
11002 return result;
11003 }
11004
11005 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11006 vrev64q_u16 (uint16x8_t a)
11007 {
11008 uint16x8_t result;
11009 __asm__ ("rev64 %0.8h,%1.8h"
11010 : "=w"(result)
11011 : "w"(a)
11012 : /* No clobbers */);
11013 return result;
11014 }
11015
11016 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11017 vrev64q_u32 (uint32x4_t a)
11018 {
11019 uint32x4_t result;
11020 __asm__ ("rev64 %0.4s,%1.4s"
11021 : "=w"(result)
11022 : "w"(a)
11023 : /* No clobbers */);
11024 return result;
11025 }
11026
11027 #define vrshrn_high_n_s16(a, b, c) \
11028 __extension__ \
11029 ({ \
11030 int16x8_t b_ = (b); \
11031 int8x8_t a_ = (a); \
11032 int8x16_t result = vcombine_s8 \
11033 (a_, vcreate_s8 \
11034 (__AARCH64_UINT64_C (0x0))); \
11035 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11036 : "+w"(result) \
11037 : "w"(b_), "i"(c) \
11038 : /* No clobbers */); \
11039 result; \
11040 })
11041
11042 #define vrshrn_high_n_s32(a, b, c) \
11043 __extension__ \
11044 ({ \
11045 int32x4_t b_ = (b); \
11046 int16x4_t a_ = (a); \
11047 int16x8_t result = vcombine_s16 \
11048 (a_, vcreate_s16 \
11049 (__AARCH64_UINT64_C (0x0))); \
11050 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11051 : "+w"(result) \
11052 : "w"(b_), "i"(c) \
11053 : /* No clobbers */); \
11054 result; \
11055 })
11056
11057 #define vrshrn_high_n_s64(a, b, c) \
11058 __extension__ \
11059 ({ \
11060 int64x2_t b_ = (b); \
11061 int32x2_t a_ = (a); \
11062 int32x4_t result = vcombine_s32 \
11063 (a_, vcreate_s32 \
11064 (__AARCH64_UINT64_C (0x0))); \
11065 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11066 : "+w"(result) \
11067 : "w"(b_), "i"(c) \
11068 : /* No clobbers */); \
11069 result; \
11070 })
11071
11072 #define vrshrn_high_n_u16(a, b, c) \
11073 __extension__ \
11074 ({ \
11075 uint16x8_t b_ = (b); \
11076 uint8x8_t a_ = (a); \
11077 uint8x16_t result = vcombine_u8 \
11078 (a_, vcreate_u8 \
11079 (__AARCH64_UINT64_C (0x0))); \
11080 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11081 : "+w"(result) \
11082 : "w"(b_), "i"(c) \
11083 : /* No clobbers */); \
11084 result; \
11085 })
11086
11087 #define vrshrn_high_n_u32(a, b, c) \
11088 __extension__ \
11089 ({ \
11090 uint32x4_t b_ = (b); \
11091 uint16x4_t a_ = (a); \
11092 uint16x8_t result = vcombine_u16 \
11093 (a_, vcreate_u16 \
11094 (__AARCH64_UINT64_C (0x0))); \
11095 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11096 : "+w"(result) \
11097 : "w"(b_), "i"(c) \
11098 : /* No clobbers */); \
11099 result; \
11100 })
11101
11102 #define vrshrn_high_n_u64(a, b, c) \
11103 __extension__ \
11104 ({ \
11105 uint64x2_t b_ = (b); \
11106 uint32x2_t a_ = (a); \
11107 uint32x4_t result = vcombine_u32 \
11108 (a_, vcreate_u32 \
11109 (__AARCH64_UINT64_C (0x0))); \
11110 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11111 : "+w"(result) \
11112 : "w"(b_), "i"(c) \
11113 : /* No clobbers */); \
11114 result; \
11115 })
11116
11117 #define vrshrn_n_s16(a, b) \
11118 __extension__ \
11119 ({ \
11120 int16x8_t a_ = (a); \
11121 int8x8_t result; \
11122 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11123 : "=w"(result) \
11124 : "w"(a_), "i"(b) \
11125 : /* No clobbers */); \
11126 result; \
11127 })
11128
11129 #define vrshrn_n_s32(a, b) \
11130 __extension__ \
11131 ({ \
11132 int32x4_t a_ = (a); \
11133 int16x4_t result; \
11134 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11135 : "=w"(result) \
11136 : "w"(a_), "i"(b) \
11137 : /* No clobbers */); \
11138 result; \
11139 })
11140
11141 #define vrshrn_n_s64(a, b) \
11142 __extension__ \
11143 ({ \
11144 int64x2_t a_ = (a); \
11145 int32x2_t result; \
11146 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11147 : "=w"(result) \
11148 : "w"(a_), "i"(b) \
11149 : /* No clobbers */); \
11150 result; \
11151 })
11152
11153 #define vrshrn_n_u16(a, b) \
11154 __extension__ \
11155 ({ \
11156 uint16x8_t a_ = (a); \
11157 uint8x8_t result; \
11158 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11159 : "=w"(result) \
11160 : "w"(a_), "i"(b) \
11161 : /* No clobbers */); \
11162 result; \
11163 })
11164
11165 #define vrshrn_n_u32(a, b) \
11166 __extension__ \
11167 ({ \
11168 uint32x4_t a_ = (a); \
11169 uint16x4_t result; \
11170 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11171 : "=w"(result) \
11172 : "w"(a_), "i"(b) \
11173 : /* No clobbers */); \
11174 result; \
11175 })
11176
11177 #define vrshrn_n_u64(a, b) \
11178 __extension__ \
11179 ({ \
11180 uint64x2_t a_ = (a); \
11181 uint32x2_t result; \
11182 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11183 : "=w"(result) \
11184 : "w"(a_), "i"(b) \
11185 : /* No clobbers */); \
11186 result; \
11187 })
11188
11189 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11190 vrsqrte_f32 (float32x2_t a)
11191 {
11192 float32x2_t result;
11193 __asm__ ("frsqrte %0.2s,%1.2s"
11194 : "=w"(result)
11195 : "w"(a)
11196 : /* No clobbers */);
11197 return result;
11198 }
11199
11200 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11201 vrsqrte_f64 (float64x1_t a)
11202 {
11203 float64x1_t result;
11204 __asm__ ("frsqrte %d0,%d1"
11205 : "=w"(result)
11206 : "w"(a)
11207 : /* No clobbers */);
11208 return result;
11209 }
11210
11211 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11212 vrsqrte_u32 (uint32x2_t a)
11213 {
11214 uint32x2_t result;
11215 __asm__ ("ursqrte %0.2s,%1.2s"
11216 : "=w"(result)
11217 : "w"(a)
11218 : /* No clobbers */);
11219 return result;
11220 }
11221
11222 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11223 vrsqrted_f64 (float64_t a)
11224 {
11225 float64_t result;
11226 __asm__ ("frsqrte %d0,%d1"
11227 : "=w"(result)
11228 : "w"(a)
11229 : /* No clobbers */);
11230 return result;
11231 }
11232
11233 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11234 vrsqrteq_f32 (float32x4_t a)
11235 {
11236 float32x4_t result;
11237 __asm__ ("frsqrte %0.4s,%1.4s"
11238 : "=w"(result)
11239 : "w"(a)
11240 : /* No clobbers */);
11241 return result;
11242 }
11243
11244 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11245 vrsqrteq_f64 (float64x2_t a)
11246 {
11247 float64x2_t result;
11248 __asm__ ("frsqrte %0.2d,%1.2d"
11249 : "=w"(result)
11250 : "w"(a)
11251 : /* No clobbers */);
11252 return result;
11253 }
11254
11255 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11256 vrsqrteq_u32 (uint32x4_t a)
11257 {
11258 uint32x4_t result;
11259 __asm__ ("ursqrte %0.4s,%1.4s"
11260 : "=w"(result)
11261 : "w"(a)
11262 : /* No clobbers */);
11263 return result;
11264 }
11265
11266 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11267 vrsqrtes_f32 (float32_t a)
11268 {
11269 float32_t result;
11270 __asm__ ("frsqrte %s0,%s1"
11271 : "=w"(result)
11272 : "w"(a)
11273 : /* No clobbers */);
11274 return result;
11275 }
11276
11277 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11278 vrsqrts_f32 (float32x2_t a, float32x2_t b)
11279 {
11280 float32x2_t result;
11281 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
11282 : "=w"(result)
11283 : "w"(a), "w"(b)
11284 : /* No clobbers */);
11285 return result;
11286 }
11287
11288 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11289 vrsqrtsd_f64 (float64_t a, float64_t b)
11290 {
11291 float64_t result;
11292 __asm__ ("frsqrts %d0,%d1,%d2"
11293 : "=w"(result)
11294 : "w"(a), "w"(b)
11295 : /* No clobbers */);
11296 return result;
11297 }
11298
11299 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11300 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
11301 {
11302 float32x4_t result;
11303 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
11304 : "=w"(result)
11305 : "w"(a), "w"(b)
11306 : /* No clobbers */);
11307 return result;
11308 }
11309
11310 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11311 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
11312 {
11313 float64x2_t result;
11314 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11315 : "=w"(result)
11316 : "w"(a), "w"(b)
11317 : /* No clobbers */);
11318 return result;
11319 }
11320
11321 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11322 vrsqrtss_f32 (float32_t a, float32_t b)
11323 {
11324 float32_t result;
11325 __asm__ ("frsqrts %s0,%s1,%s2"
11326 : "=w"(result)
11327 : "w"(a), "w"(b)
11328 : /* No clobbers */);
11329 return result;
11330 }
11331
11332 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11333 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
11334 {
11335 float64x2_t result;
11336 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11337 : "=w"(result)
11338 : "w"(a), "w"(b)
11339 : /* No clobbers */);
11340 return result;
11341 }
11342
11343 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11344 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11345 {
11346 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11347 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11348 : "+w"(result)
11349 : "w"(b), "w"(c)
11350 : /* No clobbers */);
11351 return result;
11352 }
11353
11354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11355 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11356 {
11357 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11358 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11359 : "+w"(result)
11360 : "w"(b), "w"(c)
11361 : /* No clobbers */);
11362 return result;
11363 }
11364
11365 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11366 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11367 {
11368 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11369 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11370 : "+w"(result)
11371 : "w"(b), "w"(c)
11372 : /* No clobbers */);
11373 return result;
11374 }
11375
11376 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11377 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11378 {
11379 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11380 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11381 : "+w"(result)
11382 : "w"(b), "w"(c)
11383 : /* No clobbers */);
11384 return result;
11385 }
11386
11387 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11388 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11389 {
11390 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11391 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11392 : "+w"(result)
11393 : "w"(b), "w"(c)
11394 : /* No clobbers */);
11395 return result;
11396 }
11397
11398 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11399 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11400 {
11401 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11402 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11403 : "+w"(result)
11404 : "w"(b), "w"(c)
11405 : /* No clobbers */);
11406 return result;
11407 }
11408
11409 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11410 vrsubhn_s16 (int16x8_t a, int16x8_t b)
11411 {
11412 int8x8_t result;
11413 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11414 : "=w"(result)
11415 : "w"(a), "w"(b)
11416 : /* No clobbers */);
11417 return result;
11418 }
11419
11420 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11421 vrsubhn_s32 (int32x4_t a, int32x4_t b)
11422 {
11423 int16x4_t result;
11424 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11425 : "=w"(result)
11426 : "w"(a), "w"(b)
11427 : /* No clobbers */);
11428 return result;
11429 }
11430
11431 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11432 vrsubhn_s64 (int64x2_t a, int64x2_t b)
11433 {
11434 int32x2_t result;
11435 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11436 : "=w"(result)
11437 : "w"(a), "w"(b)
11438 : /* No clobbers */);
11439 return result;
11440 }
11441
11442 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11443 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
11444 {
11445 uint8x8_t result;
11446 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11447 : "=w"(result)
11448 : "w"(a), "w"(b)
11449 : /* No clobbers */);
11450 return result;
11451 }
11452
11453 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11454 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
11455 {
11456 uint16x4_t result;
11457 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11458 : "=w"(result)
11459 : "w"(a), "w"(b)
11460 : /* No clobbers */);
11461 return result;
11462 }
11463
11464 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11465 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
11466 {
11467 uint32x2_t result;
11468 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11469 : "=w"(result)
11470 : "w"(a), "w"(b)
11471 : /* No clobbers */);
11472 return result;
11473 }
11474
11475 #define vset_lane_f32(a, b, c) \
11476 __extension__ \
11477 ({ \
11478 float32x2_t b_ = (b); \
11479 float32_t a_ = (a); \
11480 float32x2_t result; \
11481 __asm__ ("ins %0.s[%3], %w1" \
11482 : "=w"(result) \
11483 : "r"(a_), "0"(b_), "i"(c) \
11484 : /* No clobbers */); \
11485 result; \
11486 })
11487
11488 #define vset_lane_f64(a, b, c) \
11489 __extension__ \
11490 ({ \
11491 float64x1_t b_ = (b); \
11492 float64_t a_ = (a); \
11493 float64x1_t result; \
11494 __asm__ ("ins %0.d[%3], %x1" \
11495 : "=w"(result) \
11496 : "r"(a_), "0"(b_), "i"(c) \
11497 : /* No clobbers */); \
11498 result; \
11499 })
11500
11501 #define vset_lane_p8(a, b, c) \
11502 __extension__ \
11503 ({ \
11504 poly8x8_t b_ = (b); \
11505 poly8_t a_ = (a); \
11506 poly8x8_t result; \
11507 __asm__ ("ins %0.b[%3], %w1" \
11508 : "=w"(result) \
11509 : "r"(a_), "0"(b_), "i"(c) \
11510 : /* No clobbers */); \
11511 result; \
11512 })
11513
11514 #define vset_lane_p16(a, b, c) \
11515 __extension__ \
11516 ({ \
11517 poly16x4_t b_ = (b); \
11518 poly16_t a_ = (a); \
11519 poly16x4_t result; \
11520 __asm__ ("ins %0.h[%3], %w1" \
11521 : "=w"(result) \
11522 : "r"(a_), "0"(b_), "i"(c) \
11523 : /* No clobbers */); \
11524 result; \
11525 })
11526
11527 #define vset_lane_s8(a, b, c) \
11528 __extension__ \
11529 ({ \
11530 int8x8_t b_ = (b); \
11531 int8_t a_ = (a); \
11532 int8x8_t result; \
11533 __asm__ ("ins %0.b[%3], %w1" \
11534 : "=w"(result) \
11535 : "r"(a_), "0"(b_), "i"(c) \
11536 : /* No clobbers */); \
11537 result; \
11538 })
11539
11540 #define vset_lane_s16(a, b, c) \
11541 __extension__ \
11542 ({ \
11543 int16x4_t b_ = (b); \
11544 int16_t a_ = (a); \
11545 int16x4_t result; \
11546 __asm__ ("ins %0.h[%3], %w1" \
11547 : "=w"(result) \
11548 : "r"(a_), "0"(b_), "i"(c) \
11549 : /* No clobbers */); \
11550 result; \
11551 })
11552
11553 #define vset_lane_s32(a, b, c) \
11554 __extension__ \
11555 ({ \
11556 int32x2_t b_ = (b); \
11557 int32_t a_ = (a); \
11558 int32x2_t result; \
11559 __asm__ ("ins %0.s[%3], %w1" \
11560 : "=w"(result) \
11561 : "r"(a_), "0"(b_), "i"(c) \
11562 : /* No clobbers */); \
11563 result; \
11564 })
11565
11566 #define vset_lane_s64(a, b, c) \
11567 __extension__ \
11568 ({ \
11569 int64x1_t b_ = (b); \
11570 int64_t a_ = (a); \
11571 int64x1_t result; \
11572 __asm__ ("ins %0.d[%3], %x1" \
11573 : "=w"(result) \
11574 : "r"(a_), "0"(b_), "i"(c) \
11575 : /* No clobbers */); \
11576 result; \
11577 })
11578
11579 #define vset_lane_u8(a, b, c) \
11580 __extension__ \
11581 ({ \
11582 uint8x8_t b_ = (b); \
11583 uint8_t a_ = (a); \
11584 uint8x8_t result; \
11585 __asm__ ("ins %0.b[%3], %w1" \
11586 : "=w"(result) \
11587 : "r"(a_), "0"(b_), "i"(c) \
11588 : /* No clobbers */); \
11589 result; \
11590 })
11591
11592 #define vset_lane_u16(a, b, c) \
11593 __extension__ \
11594 ({ \
11595 uint16x4_t b_ = (b); \
11596 uint16_t a_ = (a); \
11597 uint16x4_t result; \
11598 __asm__ ("ins %0.h[%3], %w1" \
11599 : "=w"(result) \
11600 : "r"(a_), "0"(b_), "i"(c) \
11601 : /* No clobbers */); \
11602 result; \
11603 })
11604
11605 #define vset_lane_u32(a, b, c) \
11606 __extension__ \
11607 ({ \
11608 uint32x2_t b_ = (b); \
11609 uint32_t a_ = (a); \
11610 uint32x2_t result; \
11611 __asm__ ("ins %0.s[%3], %w1" \
11612 : "=w"(result) \
11613 : "r"(a_), "0"(b_), "i"(c) \
11614 : /* No clobbers */); \
11615 result; \
11616 })
11617
11618 #define vset_lane_u64(a, b, c) \
11619 __extension__ \
11620 ({ \
11621 uint64x1_t b_ = (b); \
11622 uint64_t a_ = (a); \
11623 uint64x1_t result; \
11624 __asm__ ("ins %0.d[%3], %x1" \
11625 : "=w"(result) \
11626 : "r"(a_), "0"(b_), "i"(c) \
11627 : /* No clobbers */); \
11628 result; \
11629 })
11630
11631 #define vsetq_lane_f32(a, b, c) \
11632 __extension__ \
11633 ({ \
11634 float32x4_t b_ = (b); \
11635 float32_t a_ = (a); \
11636 float32x4_t result; \
11637 __asm__ ("ins %0.s[%3], %w1" \
11638 : "=w"(result) \
11639 : "r"(a_), "0"(b_), "i"(c) \
11640 : /* No clobbers */); \
11641 result; \
11642 })
11643
11644 #define vsetq_lane_f64(a, b, c) \
11645 __extension__ \
11646 ({ \
11647 float64x2_t b_ = (b); \
11648 float64_t a_ = (a); \
11649 float64x2_t result; \
11650 __asm__ ("ins %0.d[%3], %x1" \
11651 : "=w"(result) \
11652 : "r"(a_), "0"(b_), "i"(c) \
11653 : /* No clobbers */); \
11654 result; \
11655 })
11656
11657 #define vsetq_lane_p8(a, b, c) \
11658 __extension__ \
11659 ({ \
11660 poly8x16_t b_ = (b); \
11661 poly8_t a_ = (a); \
11662 poly8x16_t result; \
11663 __asm__ ("ins %0.b[%3], %w1" \
11664 : "=w"(result) \
11665 : "r"(a_), "0"(b_), "i"(c) \
11666 : /* No clobbers */); \
11667 result; \
11668 })
11669
11670 #define vsetq_lane_p16(a, b, c) \
11671 __extension__ \
11672 ({ \
11673 poly16x8_t b_ = (b); \
11674 poly16_t a_ = (a); \
11675 poly16x8_t result; \
11676 __asm__ ("ins %0.h[%3], %w1" \
11677 : "=w"(result) \
11678 : "r"(a_), "0"(b_), "i"(c) \
11679 : /* No clobbers */); \
11680 result; \
11681 })
11682
11683 #define vsetq_lane_s8(a, b, c) \
11684 __extension__ \
11685 ({ \
11686 int8x16_t b_ = (b); \
11687 int8_t a_ = (a); \
11688 int8x16_t result; \
11689 __asm__ ("ins %0.b[%3], %w1" \
11690 : "=w"(result) \
11691 : "r"(a_), "0"(b_), "i"(c) \
11692 : /* No clobbers */); \
11693 result; \
11694 })
11695
11696 #define vsetq_lane_s16(a, b, c) \
11697 __extension__ \
11698 ({ \
11699 int16x8_t b_ = (b); \
11700 int16_t a_ = (a); \
11701 int16x8_t result; \
11702 __asm__ ("ins %0.h[%3], %w1" \
11703 : "=w"(result) \
11704 : "r"(a_), "0"(b_), "i"(c) \
11705 : /* No clobbers */); \
11706 result; \
11707 })
11708
11709 #define vsetq_lane_s32(a, b, c) \
11710 __extension__ \
11711 ({ \
11712 int32x4_t b_ = (b); \
11713 int32_t a_ = (a); \
11714 int32x4_t result; \
11715 __asm__ ("ins %0.s[%3], %w1" \
11716 : "=w"(result) \
11717 : "r"(a_), "0"(b_), "i"(c) \
11718 : /* No clobbers */); \
11719 result; \
11720 })
11721
11722 #define vsetq_lane_s64(a, b, c) \
11723 __extension__ \
11724 ({ \
11725 int64x2_t b_ = (b); \
11726 int64_t a_ = (a); \
11727 int64x2_t result; \
11728 __asm__ ("ins %0.d[%3], %x1" \
11729 : "=w"(result) \
11730 : "r"(a_), "0"(b_), "i"(c) \
11731 : /* No clobbers */); \
11732 result; \
11733 })
11734
11735 #define vsetq_lane_u8(a, b, c) \
11736 __extension__ \
11737 ({ \
11738 uint8x16_t b_ = (b); \
11739 uint8_t a_ = (a); \
11740 uint8x16_t result; \
11741 __asm__ ("ins %0.b[%3], %w1" \
11742 : "=w"(result) \
11743 : "r"(a_), "0"(b_), "i"(c) \
11744 : /* No clobbers */); \
11745 result; \
11746 })
11747
11748 #define vsetq_lane_u16(a, b, c) \
11749 __extension__ \
11750 ({ \
11751 uint16x8_t b_ = (b); \
11752 uint16_t a_ = (a); \
11753 uint16x8_t result; \
11754 __asm__ ("ins %0.h[%3], %w1" \
11755 : "=w"(result) \
11756 : "r"(a_), "0"(b_), "i"(c) \
11757 : /* No clobbers */); \
11758 result; \
11759 })
11760
11761 #define vsetq_lane_u32(a, b, c) \
11762 __extension__ \
11763 ({ \
11764 uint32x4_t b_ = (b); \
11765 uint32_t a_ = (a); \
11766 uint32x4_t result; \
11767 __asm__ ("ins %0.s[%3], %w1" \
11768 : "=w"(result) \
11769 : "r"(a_), "0"(b_), "i"(c) \
11770 : /* No clobbers */); \
11771 result; \
11772 })
11773
11774 #define vsetq_lane_u64(a, b, c) \
11775 __extension__ \
11776 ({ \
11777 uint64x2_t b_ = (b); \
11778 uint64_t a_ = (a); \
11779 uint64x2_t result; \
11780 __asm__ ("ins %0.d[%3], %x1" \
11781 : "=w"(result) \
11782 : "r"(a_), "0"(b_), "i"(c) \
11783 : /* No clobbers */); \
11784 result; \
11785 })
11786
11787 #define vshrn_high_n_s16(a, b, c) \
11788 __extension__ \
11789 ({ \
11790 int16x8_t b_ = (b); \
11791 int8x8_t a_ = (a); \
11792 int8x16_t result = vcombine_s8 \
11793 (a_, vcreate_s8 \
11794 (__AARCH64_UINT64_C (0x0))); \
11795 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11796 : "+w"(result) \
11797 : "w"(b_), "i"(c) \
11798 : /* No clobbers */); \
11799 result; \
11800 })
11801
11802 #define vshrn_high_n_s32(a, b, c) \
11803 __extension__ \
11804 ({ \
11805 int32x4_t b_ = (b); \
11806 int16x4_t a_ = (a); \
11807 int16x8_t result = vcombine_s16 \
11808 (a_, vcreate_s16 \
11809 (__AARCH64_UINT64_C (0x0))); \
11810 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11811 : "+w"(result) \
11812 : "w"(b_), "i"(c) \
11813 : /* No clobbers */); \
11814 result; \
11815 })
11816
11817 #define vshrn_high_n_s64(a, b, c) \
11818 __extension__ \
11819 ({ \
11820 int64x2_t b_ = (b); \
11821 int32x2_t a_ = (a); \
11822 int32x4_t result = vcombine_s32 \
11823 (a_, vcreate_s32 \
11824 (__AARCH64_UINT64_C (0x0))); \
11825 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11826 : "+w"(result) \
11827 : "w"(b_), "i"(c) \
11828 : /* No clobbers */); \
11829 result; \
11830 })
11831
11832 #define vshrn_high_n_u16(a, b, c) \
11833 __extension__ \
11834 ({ \
11835 uint16x8_t b_ = (b); \
11836 uint8x8_t a_ = (a); \
11837 uint8x16_t result = vcombine_u8 \
11838 (a_, vcreate_u8 \
11839 (__AARCH64_UINT64_C (0x0))); \
11840 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11841 : "+w"(result) \
11842 : "w"(b_), "i"(c) \
11843 : /* No clobbers */); \
11844 result; \
11845 })
11846
11847 #define vshrn_high_n_u32(a, b, c) \
11848 __extension__ \
11849 ({ \
11850 uint32x4_t b_ = (b); \
11851 uint16x4_t a_ = (a); \
11852 uint16x8_t result = vcombine_u16 \
11853 (a_, vcreate_u16 \
11854 (__AARCH64_UINT64_C (0x0))); \
11855 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11856 : "+w"(result) \
11857 : "w"(b_), "i"(c) \
11858 : /* No clobbers */); \
11859 result; \
11860 })
11861
11862 #define vshrn_high_n_u64(a, b, c) \
11863 __extension__ \
11864 ({ \
11865 uint64x2_t b_ = (b); \
11866 uint32x2_t a_ = (a); \
11867 uint32x4_t result = vcombine_u32 \
11868 (a_, vcreate_u32 \
11869 (__AARCH64_UINT64_C (0x0))); \
11870 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11871 : "+w"(result) \
11872 : "w"(b_), "i"(c) \
11873 : /* No clobbers */); \
11874 result; \
11875 })
11876
11877 #define vshrn_n_s16(a, b) \
11878 __extension__ \
11879 ({ \
11880 int16x8_t a_ = (a); \
11881 int8x8_t result; \
11882 __asm__ ("shrn %0.8b,%1.8h,%2" \
11883 : "=w"(result) \
11884 : "w"(a_), "i"(b) \
11885 : /* No clobbers */); \
11886 result; \
11887 })
11888
11889 #define vshrn_n_s32(a, b) \
11890 __extension__ \
11891 ({ \
11892 int32x4_t a_ = (a); \
11893 int16x4_t result; \
11894 __asm__ ("shrn %0.4h,%1.4s,%2" \
11895 : "=w"(result) \
11896 : "w"(a_), "i"(b) \
11897 : /* No clobbers */); \
11898 result; \
11899 })
11900
11901 #define vshrn_n_s64(a, b) \
11902 __extension__ \
11903 ({ \
11904 int64x2_t a_ = (a); \
11905 int32x2_t result; \
11906 __asm__ ("shrn %0.2s,%1.2d,%2" \
11907 : "=w"(result) \
11908 : "w"(a_), "i"(b) \
11909 : /* No clobbers */); \
11910 result; \
11911 })
11912
11913 #define vshrn_n_u16(a, b) \
11914 __extension__ \
11915 ({ \
11916 uint16x8_t a_ = (a); \
11917 uint8x8_t result; \
11918 __asm__ ("shrn %0.8b,%1.8h,%2" \
11919 : "=w"(result) \
11920 : "w"(a_), "i"(b) \
11921 : /* No clobbers */); \
11922 result; \
11923 })
11924
11925 #define vshrn_n_u32(a, b) \
11926 __extension__ \
11927 ({ \
11928 uint32x4_t a_ = (a); \
11929 uint16x4_t result; \
11930 __asm__ ("shrn %0.4h,%1.4s,%2" \
11931 : "=w"(result) \
11932 : "w"(a_), "i"(b) \
11933 : /* No clobbers */); \
11934 result; \
11935 })
11936
11937 #define vshrn_n_u64(a, b) \
11938 __extension__ \
11939 ({ \
11940 uint64x2_t a_ = (a); \
11941 uint32x2_t result; \
11942 __asm__ ("shrn %0.2s,%1.2d,%2" \
11943 : "=w"(result) \
11944 : "w"(a_), "i"(b) \
11945 : /* No clobbers */); \
11946 result; \
11947 })
11948
11949 #define vsli_n_p8(a, b, c) \
11950 __extension__ \
11951 ({ \
11952 poly8x8_t b_ = (b); \
11953 poly8x8_t a_ = (a); \
11954 poly8x8_t result; \
11955 __asm__ ("sli %0.8b,%2.8b,%3" \
11956 : "=w"(result) \
11957 : "0"(a_), "w"(b_), "i"(c) \
11958 : /* No clobbers */); \
11959 result; \
11960 })
11961
11962 #define vsli_n_p16(a, b, c) \
11963 __extension__ \
11964 ({ \
11965 poly16x4_t b_ = (b); \
11966 poly16x4_t a_ = (a); \
11967 poly16x4_t result; \
11968 __asm__ ("sli %0.4h,%2.4h,%3" \
11969 : "=w"(result) \
11970 : "0"(a_), "w"(b_), "i"(c) \
11971 : /* No clobbers */); \
11972 result; \
11973 })
11974
11975 #define vsliq_n_p8(a, b, c) \
11976 __extension__ \
11977 ({ \
11978 poly8x16_t b_ = (b); \
11979 poly8x16_t a_ = (a); \
11980 poly8x16_t result; \
11981 __asm__ ("sli %0.16b,%2.16b,%3" \
11982 : "=w"(result) \
11983 : "0"(a_), "w"(b_), "i"(c) \
11984 : /* No clobbers */); \
11985 result; \
11986 })
11987
11988 #define vsliq_n_p16(a, b, c) \
11989 __extension__ \
11990 ({ \
11991 poly16x8_t b_ = (b); \
11992 poly16x8_t a_ = (a); \
11993 poly16x8_t result; \
11994 __asm__ ("sli %0.8h,%2.8h,%3" \
11995 : "=w"(result) \
11996 : "0"(a_), "w"(b_), "i"(c) \
11997 : /* No clobbers */); \
11998 result; \
11999 })
12000
12001 #define vsri_n_p8(a, b, c) \
12002 __extension__ \
12003 ({ \
12004 poly8x8_t b_ = (b); \
12005 poly8x8_t a_ = (a); \
12006 poly8x8_t result; \
12007 __asm__ ("sri %0.8b,%2.8b,%3" \
12008 : "=w"(result) \
12009 : "0"(a_), "w"(b_), "i"(c) \
12010 : /* No clobbers */); \
12011 result; \
12012 })
12013
12014 #define vsri_n_p16(a, b, c) \
12015 __extension__ \
12016 ({ \
12017 poly16x4_t b_ = (b); \
12018 poly16x4_t a_ = (a); \
12019 poly16x4_t result; \
12020 __asm__ ("sri %0.4h,%2.4h,%3" \
12021 : "=w"(result) \
12022 : "0"(a_), "w"(b_), "i"(c) \
12023 : /* No clobbers */); \
12024 result; \
12025 })
12026
12027 #define vsriq_n_p8(a, b, c) \
12028 __extension__ \
12029 ({ \
12030 poly8x16_t b_ = (b); \
12031 poly8x16_t a_ = (a); \
12032 poly8x16_t result; \
12033 __asm__ ("sri %0.16b,%2.16b,%3" \
12034 : "=w"(result) \
12035 : "0"(a_), "w"(b_), "i"(c) \
12036 : /* No clobbers */); \
12037 result; \
12038 })
12039
12040 #define vsriq_n_p16(a, b, c) \
12041 __extension__ \
12042 ({ \
12043 poly16x8_t b_ = (b); \
12044 poly16x8_t a_ = (a); \
12045 poly16x8_t result; \
12046 __asm__ ("sri %0.8h,%2.8h,%3" \
12047 : "=w"(result) \
12048 : "0"(a_), "w"(b_), "i"(c) \
12049 : /* No clobbers */); \
12050 result; \
12051 })
12052
12053 #define vst1_lane_f32(a, b, c) \
12054 __extension__ \
12055 ({ \
12056 float32x2_t b_ = (b); \
12057 float32_t * a_ = (a); \
12058 __asm__ ("st1 {%1.s}[%2],[%0]" \
12059 : \
12060 : "r"(a_), "w"(b_), "i"(c) \
12061 : "memory"); \
12062 })
12063
12064 #define vst1_lane_f64(a, b, c) \
12065 __extension__ \
12066 ({ \
12067 float64x1_t b_ = (b); \
12068 float64_t * a_ = (a); \
12069 __asm__ ("st1 {%1.d}[%2],[%0]" \
12070 : \
12071 : "r"(a_), "w"(b_), "i"(c) \
12072 : "memory"); \
12073 })
12074
12075 #define vst1_lane_p8(a, b, c) \
12076 __extension__ \
12077 ({ \
12078 poly8x8_t b_ = (b); \
12079 poly8_t * a_ = (a); \
12080 __asm__ ("st1 {%1.b}[%2],[%0]" \
12081 : \
12082 : "r"(a_), "w"(b_), "i"(c) \
12083 : "memory"); \
12084 })
12085
12086 #define vst1_lane_p16(a, b, c) \
12087 __extension__ \
12088 ({ \
12089 poly16x4_t b_ = (b); \
12090 poly16_t * a_ = (a); \
12091 __asm__ ("st1 {%1.h}[%2],[%0]" \
12092 : \
12093 : "r"(a_), "w"(b_), "i"(c) \
12094 : "memory"); \
12095 })
12096
12097 #define vst1_lane_s8(a, b, c) \
12098 __extension__ \
12099 ({ \
12100 int8x8_t b_ = (b); \
12101 int8_t * a_ = (a); \
12102 __asm__ ("st1 {%1.b}[%2],[%0]" \
12103 : \
12104 : "r"(a_), "w"(b_), "i"(c) \
12105 : "memory"); \
12106 })
12107
12108 #define vst1_lane_s16(a, b, c) \
12109 __extension__ \
12110 ({ \
12111 int16x4_t b_ = (b); \
12112 int16_t * a_ = (a); \
12113 __asm__ ("st1 {%1.h}[%2],[%0]" \
12114 : \
12115 : "r"(a_), "w"(b_), "i"(c) \
12116 : "memory"); \
12117 })
12118
12119 #define vst1_lane_s32(a, b, c) \
12120 __extension__ \
12121 ({ \
12122 int32x2_t b_ = (b); \
12123 int32_t * a_ = (a); \
12124 __asm__ ("st1 {%1.s}[%2],[%0]" \
12125 : \
12126 : "r"(a_), "w"(b_), "i"(c) \
12127 : "memory"); \
12128 })
12129
12130 #define vst1_lane_s64(a, b, c) \
12131 __extension__ \
12132 ({ \
12133 int64x1_t b_ = (b); \
12134 int64_t * a_ = (a); \
12135 __asm__ ("st1 {%1.d}[%2],[%0]" \
12136 : \
12137 : "r"(a_), "w"(b_), "i"(c) \
12138 : "memory"); \
12139 })
12140
12141 #define vst1_lane_u8(a, b, c) \
12142 __extension__ \
12143 ({ \
12144 uint8x8_t b_ = (b); \
12145 uint8_t * a_ = (a); \
12146 __asm__ ("st1 {%1.b}[%2],[%0]" \
12147 : \
12148 : "r"(a_), "w"(b_), "i"(c) \
12149 : "memory"); \
12150 })
12151
12152 #define vst1_lane_u16(a, b, c) \
12153 __extension__ \
12154 ({ \
12155 uint16x4_t b_ = (b); \
12156 uint16_t * a_ = (a); \
12157 __asm__ ("st1 {%1.h}[%2],[%0]" \
12158 : \
12159 : "r"(a_), "w"(b_), "i"(c) \
12160 : "memory"); \
12161 })
12162
12163 #define vst1_lane_u32(a, b, c) \
12164 __extension__ \
12165 ({ \
12166 uint32x2_t b_ = (b); \
12167 uint32_t * a_ = (a); \
12168 __asm__ ("st1 {%1.s}[%2],[%0]" \
12169 : \
12170 : "r"(a_), "w"(b_), "i"(c) \
12171 : "memory"); \
12172 })
12173
12174 #define vst1_lane_u64(a, b, c) \
12175 __extension__ \
12176 ({ \
12177 uint64x1_t b_ = (b); \
12178 uint64_t * a_ = (a); \
12179 __asm__ ("st1 {%1.d}[%2],[%0]" \
12180 : \
12181 : "r"(a_), "w"(b_), "i"(c) \
12182 : "memory"); \
12183 })
12184
12185
12186 #define vst1q_lane_f32(a, b, c) \
12187 __extension__ \
12188 ({ \
12189 float32x4_t b_ = (b); \
12190 float32_t * a_ = (a); \
12191 __asm__ ("st1 {%1.s}[%2],[%0]" \
12192 : \
12193 : "r"(a_), "w"(b_), "i"(c) \
12194 : "memory"); \
12195 })
12196
12197 #define vst1q_lane_f64(a, b, c) \
12198 __extension__ \
12199 ({ \
12200 float64x2_t b_ = (b); \
12201 float64_t * a_ = (a); \
12202 __asm__ ("st1 {%1.d}[%2],[%0]" \
12203 : \
12204 : "r"(a_), "w"(b_), "i"(c) \
12205 : "memory"); \
12206 })
12207
12208 #define vst1q_lane_p8(a, b, c) \
12209 __extension__ \
12210 ({ \
12211 poly8x16_t b_ = (b); \
12212 poly8_t * a_ = (a); \
12213 __asm__ ("st1 {%1.b}[%2],[%0]" \
12214 : \
12215 : "r"(a_), "w"(b_), "i"(c) \
12216 : "memory"); \
12217 })
12218
12219 #define vst1q_lane_p16(a, b, c) \
12220 __extension__ \
12221 ({ \
12222 poly16x8_t b_ = (b); \
12223 poly16_t * a_ = (a); \
12224 __asm__ ("st1 {%1.h}[%2],[%0]" \
12225 : \
12226 : "r"(a_), "w"(b_), "i"(c) \
12227 : "memory"); \
12228 })
12229
12230 #define vst1q_lane_s8(a, b, c) \
12231 __extension__ \
12232 ({ \
12233 int8x16_t b_ = (b); \
12234 int8_t * a_ = (a); \
12235 __asm__ ("st1 {%1.b}[%2],[%0]" \
12236 : \
12237 : "r"(a_), "w"(b_), "i"(c) \
12238 : "memory"); \
12239 })
12240
12241 #define vst1q_lane_s16(a, b, c) \
12242 __extension__ \
12243 ({ \
12244 int16x8_t b_ = (b); \
12245 int16_t * a_ = (a); \
12246 __asm__ ("st1 {%1.h}[%2],[%0]" \
12247 : \
12248 : "r"(a_), "w"(b_), "i"(c) \
12249 : "memory"); \
12250 })
12251
12252 #define vst1q_lane_s32(a, b, c) \
12253 __extension__ \
12254 ({ \
12255 int32x4_t b_ = (b); \
12256 int32_t * a_ = (a); \
12257 __asm__ ("st1 {%1.s}[%2],[%0]" \
12258 : \
12259 : "r"(a_), "w"(b_), "i"(c) \
12260 : "memory"); \
12261 })
12262
12263 #define vst1q_lane_s64(a, b, c) \
12264 __extension__ \
12265 ({ \
12266 int64x2_t b_ = (b); \
12267 int64_t * a_ = (a); \
12268 __asm__ ("st1 {%1.d}[%2],[%0]" \
12269 : \
12270 : "r"(a_), "w"(b_), "i"(c) \
12271 : "memory"); \
12272 })
12273
12274 #define vst1q_lane_u8(a, b, c) \
12275 __extension__ \
12276 ({ \
12277 uint8x16_t b_ = (b); \
12278 uint8_t * a_ = (a); \
12279 __asm__ ("st1 {%1.b}[%2],[%0]" \
12280 : \
12281 : "r"(a_), "w"(b_), "i"(c) \
12282 : "memory"); \
12283 })
12284
12285 #define vst1q_lane_u16(a, b, c) \
12286 __extension__ \
12287 ({ \
12288 uint16x8_t b_ = (b); \
12289 uint16_t * a_ = (a); \
12290 __asm__ ("st1 {%1.h}[%2],[%0]" \
12291 : \
12292 : "r"(a_), "w"(b_), "i"(c) \
12293 : "memory"); \
12294 })
12295
12296 #define vst1q_lane_u32(a, b, c) \
12297 __extension__ \
12298 ({ \
12299 uint32x4_t b_ = (b); \
12300 uint32_t * a_ = (a); \
12301 __asm__ ("st1 {%1.s}[%2],[%0]" \
12302 : \
12303 : "r"(a_), "w"(b_), "i"(c) \
12304 : "memory"); \
12305 })
12306
12307 #define vst1q_lane_u64(a, b, c) \
12308 __extension__ \
12309 ({ \
12310 uint64x2_t b_ = (b); \
12311 uint64_t * a_ = (a); \
12312 __asm__ ("st1 {%1.d}[%2],[%0]" \
12313 : \
12314 : "r"(a_), "w"(b_), "i"(c) \
12315 : "memory"); \
12316 })
12317
12318 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12319 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12320 {
12321 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12322 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12323 : "+w"(result)
12324 : "w"(b), "w"(c)
12325 : /* No clobbers */);
12326 return result;
12327 }
12328
12329 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12330 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12331 {
12332 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12333 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12334 : "+w"(result)
12335 : "w"(b), "w"(c)
12336 : /* No clobbers */);
12337 return result;
12338 }
12339
12340 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12341 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12342 {
12343 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12344 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12345 : "+w"(result)
12346 : "w"(b), "w"(c)
12347 : /* No clobbers */);
12348 return result;
12349 }
12350
12351 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12352 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12353 {
12354 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12355 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12356 : "+w"(result)
12357 : "w"(b), "w"(c)
12358 : /* No clobbers */);
12359 return result;
12360 }
12361
12362 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12363 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12364 {
12365 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12366 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12367 : "+w"(result)
12368 : "w"(b), "w"(c)
12369 : /* No clobbers */);
12370 return result;
12371 }
12372
12373 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12374 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12375 {
12376 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12377 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12378 : "+w"(result)
12379 : "w"(b), "w"(c)
12380 : /* No clobbers */);
12381 return result;
12382 }
12383
12384 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12385 vsubhn_s16 (int16x8_t a, int16x8_t b)
12386 {
12387 int8x8_t result;
12388 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12389 : "=w"(result)
12390 : "w"(a), "w"(b)
12391 : /* No clobbers */);
12392 return result;
12393 }
12394
12395 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12396 vsubhn_s32 (int32x4_t a, int32x4_t b)
12397 {
12398 int16x4_t result;
12399 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12400 : "=w"(result)
12401 : "w"(a), "w"(b)
12402 : /* No clobbers */);
12403 return result;
12404 }
12405
12406 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12407 vsubhn_s64 (int64x2_t a, int64x2_t b)
12408 {
12409 int32x2_t result;
12410 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12411 : "=w"(result)
12412 : "w"(a), "w"(b)
12413 : /* No clobbers */);
12414 return result;
12415 }
12416
12417 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12418 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
12419 {
12420 uint8x8_t result;
12421 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12422 : "=w"(result)
12423 : "w"(a), "w"(b)
12424 : /* No clobbers */);
12425 return result;
12426 }
12427
12428 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12429 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
12430 {
12431 uint16x4_t result;
12432 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12433 : "=w"(result)
12434 : "w"(a), "w"(b)
12435 : /* No clobbers */);
12436 return result;
12437 }
12438
12439 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12440 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
12441 {
12442 uint32x2_t result;
12443 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12444 : "=w"(result)
12445 : "w"(a), "w"(b)
12446 : /* No clobbers */);
12447 return result;
12448 }
12449
12450 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12451 vtrn1_f32 (float32x2_t a, float32x2_t b)
12452 {
12453 float32x2_t result;
12454 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12455 : "=w"(result)
12456 : "w"(a), "w"(b)
12457 : /* No clobbers */);
12458 return result;
12459 }
12460
12461 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12462 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
12463 {
12464 poly8x8_t result;
12465 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12466 : "=w"(result)
12467 : "w"(a), "w"(b)
12468 : /* No clobbers */);
12469 return result;
12470 }
12471
12472 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12473 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
12474 {
12475 poly16x4_t result;
12476 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12477 : "=w"(result)
12478 : "w"(a), "w"(b)
12479 : /* No clobbers */);
12480 return result;
12481 }
12482
12483 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12484 vtrn1_s8 (int8x8_t a, int8x8_t b)
12485 {
12486 int8x8_t result;
12487 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12488 : "=w"(result)
12489 : "w"(a), "w"(b)
12490 : /* No clobbers */);
12491 return result;
12492 }
12493
12494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12495 vtrn1_s16 (int16x4_t a, int16x4_t b)
12496 {
12497 int16x4_t result;
12498 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12499 : "=w"(result)
12500 : "w"(a), "w"(b)
12501 : /* No clobbers */);
12502 return result;
12503 }
12504
12505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12506 vtrn1_s32 (int32x2_t a, int32x2_t b)
12507 {
12508 int32x2_t result;
12509 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12510 : "=w"(result)
12511 : "w"(a), "w"(b)
12512 : /* No clobbers */);
12513 return result;
12514 }
12515
12516 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12517 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
12518 {
12519 uint8x8_t result;
12520 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12521 : "=w"(result)
12522 : "w"(a), "w"(b)
12523 : /* No clobbers */);
12524 return result;
12525 }
12526
12527 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12528 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
12529 {
12530 uint16x4_t result;
12531 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12532 : "=w"(result)
12533 : "w"(a), "w"(b)
12534 : /* No clobbers */);
12535 return result;
12536 }
12537
12538 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12539 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
12540 {
12541 uint32x2_t result;
12542 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12543 : "=w"(result)
12544 : "w"(a), "w"(b)
12545 : /* No clobbers */);
12546 return result;
12547 }
12548
12549 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12550 vtrn1q_f32 (float32x4_t a, float32x4_t b)
12551 {
12552 float32x4_t result;
12553 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12554 : "=w"(result)
12555 : "w"(a), "w"(b)
12556 : /* No clobbers */);
12557 return result;
12558 }
12559
12560 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12561 vtrn1q_f64 (float64x2_t a, float64x2_t b)
12562 {
12563 float64x2_t result;
12564 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12565 : "=w"(result)
12566 : "w"(a), "w"(b)
12567 : /* No clobbers */);
12568 return result;
12569 }
12570
12571 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12572 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
12573 {
12574 poly8x16_t result;
12575 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12576 : "=w"(result)
12577 : "w"(a), "w"(b)
12578 : /* No clobbers */);
12579 return result;
12580 }
12581
12582 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12583 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
12584 {
12585 poly16x8_t result;
12586 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12587 : "=w"(result)
12588 : "w"(a), "w"(b)
12589 : /* No clobbers */);
12590 return result;
12591 }
12592
12593 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12594 vtrn1q_s8 (int8x16_t a, int8x16_t b)
12595 {
12596 int8x16_t result;
12597 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12598 : "=w"(result)
12599 : "w"(a), "w"(b)
12600 : /* No clobbers */);
12601 return result;
12602 }
12603
12604 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12605 vtrn1q_s16 (int16x8_t a, int16x8_t b)
12606 {
12607 int16x8_t result;
12608 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12609 : "=w"(result)
12610 : "w"(a), "w"(b)
12611 : /* No clobbers */);
12612 return result;
12613 }
12614
12615 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12616 vtrn1q_s32 (int32x4_t a, int32x4_t b)
12617 {
12618 int32x4_t result;
12619 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12620 : "=w"(result)
12621 : "w"(a), "w"(b)
12622 : /* No clobbers */);
12623 return result;
12624 }
12625
12626 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12627 vtrn1q_s64 (int64x2_t a, int64x2_t b)
12628 {
12629 int64x2_t result;
12630 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12631 : "=w"(result)
12632 : "w"(a), "w"(b)
12633 : /* No clobbers */);
12634 return result;
12635 }
12636
12637 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12638 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
12639 {
12640 uint8x16_t result;
12641 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12642 : "=w"(result)
12643 : "w"(a), "w"(b)
12644 : /* No clobbers */);
12645 return result;
12646 }
12647
12648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12649 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
12650 {
12651 uint16x8_t result;
12652 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12653 : "=w"(result)
12654 : "w"(a), "w"(b)
12655 : /* No clobbers */);
12656 return result;
12657 }
12658
12659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12660 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
12661 {
12662 uint32x4_t result;
12663 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12664 : "=w"(result)
12665 : "w"(a), "w"(b)
12666 : /* No clobbers */);
12667 return result;
12668 }
12669
12670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12671 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
12672 {
12673 uint64x2_t result;
12674 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12675 : "=w"(result)
12676 : "w"(a), "w"(b)
12677 : /* No clobbers */);
12678 return result;
12679 }
12680
12681 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12682 vtrn2_f32 (float32x2_t a, float32x2_t b)
12683 {
12684 float32x2_t result;
12685 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12686 : "=w"(result)
12687 : "w"(a), "w"(b)
12688 : /* No clobbers */);
12689 return result;
12690 }
12691
12692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12693 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
12694 {
12695 poly8x8_t result;
12696 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12697 : "=w"(result)
12698 : "w"(a), "w"(b)
12699 : /* No clobbers */);
12700 return result;
12701 }
12702
12703 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12704 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
12705 {
12706 poly16x4_t result;
12707 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12708 : "=w"(result)
12709 : "w"(a), "w"(b)
12710 : /* No clobbers */);
12711 return result;
12712 }
12713
12714 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12715 vtrn2_s8 (int8x8_t a, int8x8_t b)
12716 {
12717 int8x8_t result;
12718 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12719 : "=w"(result)
12720 : "w"(a), "w"(b)
12721 : /* No clobbers */);
12722 return result;
12723 }
12724
12725 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12726 vtrn2_s16 (int16x4_t a, int16x4_t b)
12727 {
12728 int16x4_t result;
12729 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12730 : "=w"(result)
12731 : "w"(a), "w"(b)
12732 : /* No clobbers */);
12733 return result;
12734 }
12735
12736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12737 vtrn2_s32 (int32x2_t a, int32x2_t b)
12738 {
12739 int32x2_t result;
12740 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12741 : "=w"(result)
12742 : "w"(a), "w"(b)
12743 : /* No clobbers */);
12744 return result;
12745 }
12746
12747 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12748 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
12749 {
12750 uint8x8_t result;
12751 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12752 : "=w"(result)
12753 : "w"(a), "w"(b)
12754 : /* No clobbers */);
12755 return result;
12756 }
12757
12758 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12759 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
12760 {
12761 uint16x4_t result;
12762 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12763 : "=w"(result)
12764 : "w"(a), "w"(b)
12765 : /* No clobbers */);
12766 return result;
12767 }
12768
12769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12770 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
12771 {
12772 uint32x2_t result;
12773 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12774 : "=w"(result)
12775 : "w"(a), "w"(b)
12776 : /* No clobbers */);
12777 return result;
12778 }
12779
12780 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12781 vtrn2q_f32 (float32x4_t a, float32x4_t b)
12782 {
12783 float32x4_t result;
12784 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12785 : "=w"(result)
12786 : "w"(a), "w"(b)
12787 : /* No clobbers */);
12788 return result;
12789 }
12790
12791 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12792 vtrn2q_f64 (float64x2_t a, float64x2_t b)
12793 {
12794 float64x2_t result;
12795 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12796 : "=w"(result)
12797 : "w"(a), "w"(b)
12798 : /* No clobbers */);
12799 return result;
12800 }
12801
12802 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12803 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
12804 {
12805 poly8x16_t result;
12806 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12807 : "=w"(result)
12808 : "w"(a), "w"(b)
12809 : /* No clobbers */);
12810 return result;
12811 }
12812
12813 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12814 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
12815 {
12816 poly16x8_t result;
12817 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12818 : "=w"(result)
12819 : "w"(a), "w"(b)
12820 : /* No clobbers */);
12821 return result;
12822 }
12823
12824 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12825 vtrn2q_s8 (int8x16_t a, int8x16_t b)
12826 {
12827 int8x16_t result;
12828 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12829 : "=w"(result)
12830 : "w"(a), "w"(b)
12831 : /* No clobbers */);
12832 return result;
12833 }
12834
12835 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12836 vtrn2q_s16 (int16x8_t a, int16x8_t b)
12837 {
12838 int16x8_t result;
12839 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12840 : "=w"(result)
12841 : "w"(a), "w"(b)
12842 : /* No clobbers */);
12843 return result;
12844 }
12845
12846 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12847 vtrn2q_s32 (int32x4_t a, int32x4_t b)
12848 {
12849 int32x4_t result;
12850 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12851 : "=w"(result)
12852 : "w"(a), "w"(b)
12853 : /* No clobbers */);
12854 return result;
12855 }
12856
12857 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12858 vtrn2q_s64 (int64x2_t a, int64x2_t b)
12859 {
12860 int64x2_t result;
12861 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12862 : "=w"(result)
12863 : "w"(a), "w"(b)
12864 : /* No clobbers */);
12865 return result;
12866 }
12867
12868 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12869 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
12870 {
12871 uint8x16_t result;
12872 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12873 : "=w"(result)
12874 : "w"(a), "w"(b)
12875 : /* No clobbers */);
12876 return result;
12877 }
12878
12879 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12880 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
12881 {
12882 uint16x8_t result;
12883 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12884 : "=w"(result)
12885 : "w"(a), "w"(b)
12886 : /* No clobbers */);
12887 return result;
12888 }
12889
12890 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12891 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
12892 {
12893 uint32x4_t result;
12894 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12895 : "=w"(result)
12896 : "w"(a), "w"(b)
12897 : /* No clobbers */);
12898 return result;
12899 }
12900
12901 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12902 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
12903 {
12904 uint64x2_t result;
12905 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12906 : "=w"(result)
12907 : "w"(a), "w"(b)
12908 : /* No clobbers */);
12909 return result;
12910 }
12911
12912 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12913 vtst_p8 (poly8x8_t a, poly8x8_t b)
12914 {
12915 uint8x8_t result;
12916 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
12917 : "=w"(result)
12918 : "w"(a), "w"(b)
12919 : /* No clobbers */);
12920 return result;
12921 }
12922
12923 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12924 vtst_p16 (poly16x4_t a, poly16x4_t b)
12925 {
12926 uint16x4_t result;
12927 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
12928 : "=w"(result)
12929 : "w"(a), "w"(b)
12930 : /* No clobbers */);
12931 return result;
12932 }
12933
12934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12935 vtstq_p8 (poly8x16_t a, poly8x16_t b)
12936 {
12937 uint8x16_t result;
12938 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
12939 : "=w"(result)
12940 : "w"(a), "w"(b)
12941 : /* No clobbers */);
12942 return result;
12943 }
12944
12945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12946 vtstq_p16 (poly16x8_t a, poly16x8_t b)
12947 {
12948 uint16x8_t result;
12949 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
12950 : "=w"(result)
12951 : "w"(a), "w"(b)
12952 : /* No clobbers */);
12953 return result;
12954 }
12955 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12956 vuzp1_f32 (float32x2_t a, float32x2_t b)
12957 {
12958 float32x2_t result;
12959 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
12960 : "=w"(result)
12961 : "w"(a), "w"(b)
12962 : /* No clobbers */);
12963 return result;
12964 }
12965
12966 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12967 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
12968 {
12969 poly8x8_t result;
12970 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12971 : "=w"(result)
12972 : "w"(a), "w"(b)
12973 : /* No clobbers */);
12974 return result;
12975 }
12976
12977 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12978 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
12979 {
12980 poly16x4_t result;
12981 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
12982 : "=w"(result)
12983 : "w"(a), "w"(b)
12984 : /* No clobbers */);
12985 return result;
12986 }
12987
12988 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12989 vuzp1_s8 (int8x8_t a, int8x8_t b)
12990 {
12991 int8x8_t result;
12992 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12993 : "=w"(result)
12994 : "w"(a), "w"(b)
12995 : /* No clobbers */);
12996 return result;
12997 }
12998
12999 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13000 vuzp1_s16 (int16x4_t a, int16x4_t b)
13001 {
13002 int16x4_t result;
13003 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13004 : "=w"(result)
13005 : "w"(a), "w"(b)
13006 : /* No clobbers */);
13007 return result;
13008 }
13009
13010 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13011 vuzp1_s32 (int32x2_t a, int32x2_t b)
13012 {
13013 int32x2_t result;
13014 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13015 : "=w"(result)
13016 : "w"(a), "w"(b)
13017 : /* No clobbers */);
13018 return result;
13019 }
13020
13021 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13022 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
13023 {
13024 uint8x8_t result;
13025 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13026 : "=w"(result)
13027 : "w"(a), "w"(b)
13028 : /* No clobbers */);
13029 return result;
13030 }
13031
13032 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13033 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
13034 {
13035 uint16x4_t result;
13036 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13037 : "=w"(result)
13038 : "w"(a), "w"(b)
13039 : /* No clobbers */);
13040 return result;
13041 }
13042
13043 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13044 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
13045 {
13046 uint32x2_t result;
13047 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13048 : "=w"(result)
13049 : "w"(a), "w"(b)
13050 : /* No clobbers */);
13051 return result;
13052 }
13053
13054 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13055 vuzp1q_f32 (float32x4_t a, float32x4_t b)
13056 {
13057 float32x4_t result;
13058 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13059 : "=w"(result)
13060 : "w"(a), "w"(b)
13061 : /* No clobbers */);
13062 return result;
13063 }
13064
13065 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13066 vuzp1q_f64 (float64x2_t a, float64x2_t b)
13067 {
13068 float64x2_t result;
13069 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13070 : "=w"(result)
13071 : "w"(a), "w"(b)
13072 : /* No clobbers */);
13073 return result;
13074 }
13075
13076 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13077 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
13078 {
13079 poly8x16_t result;
13080 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13081 : "=w"(result)
13082 : "w"(a), "w"(b)
13083 : /* No clobbers */);
13084 return result;
13085 }
13086
13087 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13088 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
13089 {
13090 poly16x8_t result;
13091 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13092 : "=w"(result)
13093 : "w"(a), "w"(b)
13094 : /* No clobbers */);
13095 return result;
13096 }
13097
13098 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13099 vuzp1q_s8 (int8x16_t a, int8x16_t b)
13100 {
13101 int8x16_t result;
13102 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13103 : "=w"(result)
13104 : "w"(a), "w"(b)
13105 : /* No clobbers */);
13106 return result;
13107 }
13108
13109 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13110 vuzp1q_s16 (int16x8_t a, int16x8_t b)
13111 {
13112 int16x8_t result;
13113 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13114 : "=w"(result)
13115 : "w"(a), "w"(b)
13116 : /* No clobbers */);
13117 return result;
13118 }
13119
13120 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13121 vuzp1q_s32 (int32x4_t a, int32x4_t b)
13122 {
13123 int32x4_t result;
13124 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13125 : "=w"(result)
13126 : "w"(a), "w"(b)
13127 : /* No clobbers */);
13128 return result;
13129 }
13130
13131 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13132 vuzp1q_s64 (int64x2_t a, int64x2_t b)
13133 {
13134 int64x2_t result;
13135 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13136 : "=w"(result)
13137 : "w"(a), "w"(b)
13138 : /* No clobbers */);
13139 return result;
13140 }
13141
13142 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13143 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
13144 {
13145 uint8x16_t result;
13146 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13147 : "=w"(result)
13148 : "w"(a), "w"(b)
13149 : /* No clobbers */);
13150 return result;
13151 }
13152
13153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13154 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
13155 {
13156 uint16x8_t result;
13157 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13158 : "=w"(result)
13159 : "w"(a), "w"(b)
13160 : /* No clobbers */);
13161 return result;
13162 }
13163
13164 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13165 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
13166 {
13167 uint32x4_t result;
13168 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13169 : "=w"(result)
13170 : "w"(a), "w"(b)
13171 : /* No clobbers */);
13172 return result;
13173 }
13174
13175 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13176 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
13177 {
13178 uint64x2_t result;
13179 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13180 : "=w"(result)
13181 : "w"(a), "w"(b)
13182 : /* No clobbers */);
13183 return result;
13184 }
13185
13186 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13187 vuzp2_f32 (float32x2_t a, float32x2_t b)
13188 {
13189 float32x2_t result;
13190 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13191 : "=w"(result)
13192 : "w"(a), "w"(b)
13193 : /* No clobbers */);
13194 return result;
13195 }
13196
13197 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13198 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
13199 {
13200 poly8x8_t result;
13201 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13202 : "=w"(result)
13203 : "w"(a), "w"(b)
13204 : /* No clobbers */);
13205 return result;
13206 }
13207
13208 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13209 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
13210 {
13211 poly16x4_t result;
13212 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13213 : "=w"(result)
13214 : "w"(a), "w"(b)
13215 : /* No clobbers */);
13216 return result;
13217 }
13218
13219 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13220 vuzp2_s8 (int8x8_t a, int8x8_t b)
13221 {
13222 int8x8_t result;
13223 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13224 : "=w"(result)
13225 : "w"(a), "w"(b)
13226 : /* No clobbers */);
13227 return result;
13228 }
13229
13230 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13231 vuzp2_s16 (int16x4_t a, int16x4_t b)
13232 {
13233 int16x4_t result;
13234 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13235 : "=w"(result)
13236 : "w"(a), "w"(b)
13237 : /* No clobbers */);
13238 return result;
13239 }
13240
13241 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13242 vuzp2_s32 (int32x2_t a, int32x2_t b)
13243 {
13244 int32x2_t result;
13245 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13246 : "=w"(result)
13247 : "w"(a), "w"(b)
13248 : /* No clobbers */);
13249 return result;
13250 }
13251
13252 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13253 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
13254 {
13255 uint8x8_t result;
13256 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13257 : "=w"(result)
13258 : "w"(a), "w"(b)
13259 : /* No clobbers */);
13260 return result;
13261 }
13262
13263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13264 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
13265 {
13266 uint16x4_t result;
13267 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13268 : "=w"(result)
13269 : "w"(a), "w"(b)
13270 : /* No clobbers */);
13271 return result;
13272 }
13273
13274 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13275 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
13276 {
13277 uint32x2_t result;
13278 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13279 : "=w"(result)
13280 : "w"(a), "w"(b)
13281 : /* No clobbers */);
13282 return result;
13283 }
13284
13285 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13286 vuzp2q_f32 (float32x4_t a, float32x4_t b)
13287 {
13288 float32x4_t result;
13289 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13290 : "=w"(result)
13291 : "w"(a), "w"(b)
13292 : /* No clobbers */);
13293 return result;
13294 }
13295
13296 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13297 vuzp2q_f64 (float64x2_t a, float64x2_t b)
13298 {
13299 float64x2_t result;
13300 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13301 : "=w"(result)
13302 : "w"(a), "w"(b)
13303 : /* No clobbers */);
13304 return result;
13305 }
13306
13307 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13308 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
13309 {
13310 poly8x16_t result;
13311 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13312 : "=w"(result)
13313 : "w"(a), "w"(b)
13314 : /* No clobbers */);
13315 return result;
13316 }
13317
13318 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13319 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
13320 {
13321 poly16x8_t result;
13322 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13323 : "=w"(result)
13324 : "w"(a), "w"(b)
13325 : /* No clobbers */);
13326 return result;
13327 }
13328
13329 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13330 vuzp2q_s8 (int8x16_t a, int8x16_t b)
13331 {
13332 int8x16_t result;
13333 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13334 : "=w"(result)
13335 : "w"(a), "w"(b)
13336 : /* No clobbers */);
13337 return result;
13338 }
13339
13340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13341 vuzp2q_s16 (int16x8_t a, int16x8_t b)
13342 {
13343 int16x8_t result;
13344 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13345 : "=w"(result)
13346 : "w"(a), "w"(b)
13347 : /* No clobbers */);
13348 return result;
13349 }
13350
13351 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13352 vuzp2q_s32 (int32x4_t a, int32x4_t b)
13353 {
13354 int32x4_t result;
13355 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13356 : "=w"(result)
13357 : "w"(a), "w"(b)
13358 : /* No clobbers */);
13359 return result;
13360 }
13361
13362 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13363 vuzp2q_s64 (int64x2_t a, int64x2_t b)
13364 {
13365 int64x2_t result;
13366 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13367 : "=w"(result)
13368 : "w"(a), "w"(b)
13369 : /* No clobbers */);
13370 return result;
13371 }
13372
13373 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13374 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
13375 {
13376 uint8x16_t result;
13377 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13378 : "=w"(result)
13379 : "w"(a), "w"(b)
13380 : /* No clobbers */);
13381 return result;
13382 }
13383
13384 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13385 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
13386 {
13387 uint16x8_t result;
13388 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13389 : "=w"(result)
13390 : "w"(a), "w"(b)
13391 : /* No clobbers */);
13392 return result;
13393 }
13394
13395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13396 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
13397 {
13398 uint32x4_t result;
13399 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13400 : "=w"(result)
13401 : "w"(a), "w"(b)
13402 : /* No clobbers */);
13403 return result;
13404 }
13405
13406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13407 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
13408 {
13409 uint64x2_t result;
13410 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13411 : "=w"(result)
13412 : "w"(a), "w"(b)
13413 : /* No clobbers */);
13414 return result;
13415 }
13416
13417 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13418 vzip1_f32 (float32x2_t a, float32x2_t b)
13419 {
13420 float32x2_t result;
13421 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13422 : "=w"(result)
13423 : "w"(a), "w"(b)
13424 : /* No clobbers */);
13425 return result;
13426 }
13427
13428 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13429 vzip1_p8 (poly8x8_t a, poly8x8_t b)
13430 {
13431 poly8x8_t result;
13432 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13433 : "=w"(result)
13434 : "w"(a), "w"(b)
13435 : /* No clobbers */);
13436 return result;
13437 }
13438
13439 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13440 vzip1_p16 (poly16x4_t a, poly16x4_t b)
13441 {
13442 poly16x4_t result;
13443 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13444 : "=w"(result)
13445 : "w"(a), "w"(b)
13446 : /* No clobbers */);
13447 return result;
13448 }
13449
13450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13451 vzip1_s8 (int8x8_t a, int8x8_t b)
13452 {
13453 int8x8_t result;
13454 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13455 : "=w"(result)
13456 : "w"(a), "w"(b)
13457 : /* No clobbers */);
13458 return result;
13459 }
13460
13461 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13462 vzip1_s16 (int16x4_t a, int16x4_t b)
13463 {
13464 int16x4_t result;
13465 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13466 : "=w"(result)
13467 : "w"(a), "w"(b)
13468 : /* No clobbers */);
13469 return result;
13470 }
13471
13472 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13473 vzip1_s32 (int32x2_t a, int32x2_t b)
13474 {
13475 int32x2_t result;
13476 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13477 : "=w"(result)
13478 : "w"(a), "w"(b)
13479 : /* No clobbers */);
13480 return result;
13481 }
13482
13483 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13484 vzip1_u8 (uint8x8_t a, uint8x8_t b)
13485 {
13486 uint8x8_t result;
13487 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13488 : "=w"(result)
13489 : "w"(a), "w"(b)
13490 : /* No clobbers */);
13491 return result;
13492 }
13493
13494 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13495 vzip1_u16 (uint16x4_t a, uint16x4_t b)
13496 {
13497 uint16x4_t result;
13498 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13499 : "=w"(result)
13500 : "w"(a), "w"(b)
13501 : /* No clobbers */);
13502 return result;
13503 }
13504
13505 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13506 vzip1_u32 (uint32x2_t a, uint32x2_t b)
13507 {
13508 uint32x2_t result;
13509 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13510 : "=w"(result)
13511 : "w"(a), "w"(b)
13512 : /* No clobbers */);
13513 return result;
13514 }
13515
13516 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13517 vzip1q_f32 (float32x4_t a, float32x4_t b)
13518 {
13519 float32x4_t result;
13520 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13521 : "=w"(result)
13522 : "w"(a), "w"(b)
13523 : /* No clobbers */);
13524 return result;
13525 }
13526
13527 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13528 vzip1q_f64 (float64x2_t a, float64x2_t b)
13529 {
13530 float64x2_t result;
13531 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13532 : "=w"(result)
13533 : "w"(a), "w"(b)
13534 : /* No clobbers */);
13535 return result;
13536 }
13537
13538 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13539 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
13540 {
13541 poly8x16_t result;
13542 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13543 : "=w"(result)
13544 : "w"(a), "w"(b)
13545 : /* No clobbers */);
13546 return result;
13547 }
13548
13549 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13550 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
13551 {
13552 poly16x8_t result;
13553 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13554 : "=w"(result)
13555 : "w"(a), "w"(b)
13556 : /* No clobbers */);
13557 return result;
13558 }
13559
13560 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13561 vzip1q_s8 (int8x16_t a, int8x16_t b)
13562 {
13563 int8x16_t result;
13564 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13565 : "=w"(result)
13566 : "w"(a), "w"(b)
13567 : /* No clobbers */);
13568 return result;
13569 }
13570
13571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13572 vzip1q_s16 (int16x8_t a, int16x8_t b)
13573 {
13574 int16x8_t result;
13575 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13576 : "=w"(result)
13577 : "w"(a), "w"(b)
13578 : /* No clobbers */);
13579 return result;
13580 }
13581
13582 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13583 vzip1q_s32 (int32x4_t a, int32x4_t b)
13584 {
13585 int32x4_t result;
13586 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13587 : "=w"(result)
13588 : "w"(a), "w"(b)
13589 : /* No clobbers */);
13590 return result;
13591 }
13592
13593 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13594 vzip1q_s64 (int64x2_t a, int64x2_t b)
13595 {
13596 int64x2_t result;
13597 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13598 : "=w"(result)
13599 : "w"(a), "w"(b)
13600 : /* No clobbers */);
13601 return result;
13602 }
13603
13604 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13605 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
13606 {
13607 uint8x16_t result;
13608 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13609 : "=w"(result)
13610 : "w"(a), "w"(b)
13611 : /* No clobbers */);
13612 return result;
13613 }
13614
13615 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13616 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
13617 {
13618 uint16x8_t result;
13619 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13620 : "=w"(result)
13621 : "w"(a), "w"(b)
13622 : /* No clobbers */);
13623 return result;
13624 }
13625
13626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13627 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
13628 {
13629 uint32x4_t result;
13630 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13631 : "=w"(result)
13632 : "w"(a), "w"(b)
13633 : /* No clobbers */);
13634 return result;
13635 }
13636
13637 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13638 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
13639 {
13640 uint64x2_t result;
13641 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13642 : "=w"(result)
13643 : "w"(a), "w"(b)
13644 : /* No clobbers */);
13645 return result;
13646 }
13647
13648 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13649 vzip2_f32 (float32x2_t a, float32x2_t b)
13650 {
13651 float32x2_t result;
13652 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13653 : "=w"(result)
13654 : "w"(a), "w"(b)
13655 : /* No clobbers */);
13656 return result;
13657 }
13658
13659 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13660 vzip2_p8 (poly8x8_t a, poly8x8_t b)
13661 {
13662 poly8x8_t result;
13663 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13664 : "=w"(result)
13665 : "w"(a), "w"(b)
13666 : /* No clobbers */);
13667 return result;
13668 }
13669
13670 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13671 vzip2_p16 (poly16x4_t a, poly16x4_t b)
13672 {
13673 poly16x4_t result;
13674 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13675 : "=w"(result)
13676 : "w"(a), "w"(b)
13677 : /* No clobbers */);
13678 return result;
13679 }
13680
13681 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13682 vzip2_s8 (int8x8_t a, int8x8_t b)
13683 {
13684 int8x8_t result;
13685 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13686 : "=w"(result)
13687 : "w"(a), "w"(b)
13688 : /* No clobbers */);
13689 return result;
13690 }
13691
13692 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13693 vzip2_s16 (int16x4_t a, int16x4_t b)
13694 {
13695 int16x4_t result;
13696 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13697 : "=w"(result)
13698 : "w"(a), "w"(b)
13699 : /* No clobbers */);
13700 return result;
13701 }
13702
13703 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13704 vzip2_s32 (int32x2_t a, int32x2_t b)
13705 {
13706 int32x2_t result;
13707 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13708 : "=w"(result)
13709 : "w"(a), "w"(b)
13710 : /* No clobbers */);
13711 return result;
13712 }
13713
13714 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13715 vzip2_u8 (uint8x8_t a, uint8x8_t b)
13716 {
13717 uint8x8_t result;
13718 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13719 : "=w"(result)
13720 : "w"(a), "w"(b)
13721 : /* No clobbers */);
13722 return result;
13723 }
13724
13725 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13726 vzip2_u16 (uint16x4_t a, uint16x4_t b)
13727 {
13728 uint16x4_t result;
13729 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13730 : "=w"(result)
13731 : "w"(a), "w"(b)
13732 : /* No clobbers */);
13733 return result;
13734 }
13735
13736 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13737 vzip2_u32 (uint32x2_t a, uint32x2_t b)
13738 {
13739 uint32x2_t result;
13740 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13741 : "=w"(result)
13742 : "w"(a), "w"(b)
13743 : /* No clobbers */);
13744 return result;
13745 }
13746
13747 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13748 vzip2q_f32 (float32x4_t a, float32x4_t b)
13749 {
13750 float32x4_t result;
13751 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13752 : "=w"(result)
13753 : "w"(a), "w"(b)
13754 : /* No clobbers */);
13755 return result;
13756 }
13757
13758 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13759 vzip2q_f64 (float64x2_t a, float64x2_t b)
13760 {
13761 float64x2_t result;
13762 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13763 : "=w"(result)
13764 : "w"(a), "w"(b)
13765 : /* No clobbers */);
13766 return result;
13767 }
13768
13769 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13770 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
13771 {
13772 poly8x16_t result;
13773 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13774 : "=w"(result)
13775 : "w"(a), "w"(b)
13776 : /* No clobbers */);
13777 return result;
13778 }
13779
13780 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13781 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
13782 {
13783 poly16x8_t result;
13784 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13785 : "=w"(result)
13786 : "w"(a), "w"(b)
13787 : /* No clobbers */);
13788 return result;
13789 }
13790
13791 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13792 vzip2q_s8 (int8x16_t a, int8x16_t b)
13793 {
13794 int8x16_t result;
13795 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13796 : "=w"(result)
13797 : "w"(a), "w"(b)
13798 : /* No clobbers */);
13799 return result;
13800 }
13801
13802 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13803 vzip2q_s16 (int16x8_t a, int16x8_t b)
13804 {
13805 int16x8_t result;
13806 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13807 : "=w"(result)
13808 : "w"(a), "w"(b)
13809 : /* No clobbers */);
13810 return result;
13811 }
13812
13813 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13814 vzip2q_s32 (int32x4_t a, int32x4_t b)
13815 {
13816 int32x4_t result;
13817 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13818 : "=w"(result)
13819 : "w"(a), "w"(b)
13820 : /* No clobbers */);
13821 return result;
13822 }
13823
13824 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13825 vzip2q_s64 (int64x2_t a, int64x2_t b)
13826 {
13827 int64x2_t result;
13828 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13829 : "=w"(result)
13830 : "w"(a), "w"(b)
13831 : /* No clobbers */);
13832 return result;
13833 }
13834
13835 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13836 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
13837 {
13838 uint8x16_t result;
13839 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13840 : "=w"(result)
13841 : "w"(a), "w"(b)
13842 : /* No clobbers */);
13843 return result;
13844 }
13845
13846 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13847 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
13848 {
13849 uint16x8_t result;
13850 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13851 : "=w"(result)
13852 : "w"(a), "w"(b)
13853 : /* No clobbers */);
13854 return result;
13855 }
13856
13857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13858 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
13859 {
13860 uint32x4_t result;
13861 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13862 : "=w"(result)
13863 : "w"(a), "w"(b)
13864 : /* No clobbers */);
13865 return result;
13866 }
13867
13868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13869 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
13870 {
13871 uint64x2_t result;
13872 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13873 : "=w"(result)
13874 : "w"(a), "w"(b)
13875 : /* No clobbers */);
13876 return result;
13877 }
13878
13879 /* End of temporary inline asm implementations. */
13880
13881 /* Start of temporary inline asm for vldn, vstn and friends. */
13882
13883 /* Create struct element types for duplicating loads.
13884
13885 Create 2 element structures of:
13886
13887 +------+----+----+----+----+
13888 | | 8 | 16 | 32 | 64 |
13889 +------+----+----+----+----+
13890 |int | Y | Y | N | N |
13891 +------+----+----+----+----+
13892 |uint | Y | Y | N | N |
13893 +------+----+----+----+----+
13894 |float | - | - | N | N |
13895 +------+----+----+----+----+
13896 |poly | Y | Y | - | - |
13897 +------+----+----+----+----+
13898
13899 Create 3 element structures of:
13900
13901 +------+----+----+----+----+
13902 | | 8 | 16 | 32 | 64 |
13903 +------+----+----+----+----+
13904 |int | Y | Y | Y | Y |
13905 +------+----+----+----+----+
13906 |uint | Y | Y | Y | Y |
13907 +------+----+----+----+----+
13908 |float | - | - | Y | Y |
13909 +------+----+----+----+----+
13910 |poly | Y | Y | - | - |
13911 +------+----+----+----+----+
13912
13913 Create 4 element structures of:
13914
13915 +------+----+----+----+----+
13916 | | 8 | 16 | 32 | 64 |
13917 +------+----+----+----+----+
13918 |int | Y | N | N | Y |
13919 +------+----+----+----+----+
13920 |uint | Y | N | N | Y |
13921 +------+----+----+----+----+
13922 |float | - | - | N | Y |
13923 +------+----+----+----+----+
13924 |poly | Y | N | - | - |
13925 +------+----+----+----+----+
13926
13927 This is required for casting memory reference. */
13928 #define __STRUCTN(t, sz, nelem) \
13929 typedef struct t ## sz ## x ## nelem ## _t { \
13930 t ## sz ## _t val[nelem]; \
13931 } t ## sz ## x ## nelem ## _t;
13932
13933 /* 2-element structs. */
13934 __STRUCTN (int, 8, 2)
13935 __STRUCTN (int, 16, 2)
13936 __STRUCTN (uint, 8, 2)
13937 __STRUCTN (uint, 16, 2)
13938 __STRUCTN (poly, 8, 2)
13939 __STRUCTN (poly, 16, 2)
13940 /* 3-element structs. */
13941 __STRUCTN (int, 8, 3)
13942 __STRUCTN (int, 16, 3)
13943 __STRUCTN (int, 32, 3)
13944 __STRUCTN (int, 64, 3)
13945 __STRUCTN (uint, 8, 3)
13946 __STRUCTN (uint, 16, 3)
13947 __STRUCTN (uint, 32, 3)
13948 __STRUCTN (uint, 64, 3)
13949 __STRUCTN (float, 32, 3)
13950 __STRUCTN (float, 64, 3)
13951 __STRUCTN (poly, 8, 3)
13952 __STRUCTN (poly, 16, 3)
13953 /* 4-element structs. */
13954 __STRUCTN (int, 8, 4)
13955 __STRUCTN (int, 64, 4)
13956 __STRUCTN (uint, 8, 4)
13957 __STRUCTN (uint, 64, 4)
13958 __STRUCTN (poly, 8, 4)
13959 __STRUCTN (float, 64, 4)
13960 #undef __STRUCTN
13961
13962 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
13963 regsuffix, funcsuffix, Q) \
13964 __extension__ static __inline rettype \
13965 __attribute__ ((__always_inline__)) \
13966 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13967 { \
13968 rettype result; \
13969 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
13970 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
13971 : "=Q"(result) \
13972 : "Q"(*(const structtype *)ptr) \
13973 : "memory", "v16", "v17"); \
13974 return result; \
13975 }
13976
13977 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
13978 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
13979 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
13980 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
13981 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
13982 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
13983 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
13984 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
13985 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
13986 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
13987 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
13988 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
13989 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
13990 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
13991 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
13992 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
13993 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
13994 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
13995 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
13996 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
13997 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
13998 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
13999 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
14000 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
14001
14002 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
14003 lnsuffix, funcsuffix, Q) \
14004 __extension__ static __inline rettype \
14005 __attribute__ ((__always_inline__)) \
14006 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14007 rettype b, const int c) \
14008 { \
14009 rettype result; \
14010 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14011 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
14012 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
14013 : "=Q"(result) \
14014 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14015 : "memory", "v16", "v17"); \
14016 return result; \
14017 }
14018
14019 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
14020 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14021 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14022 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14023 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14024 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14025 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14026 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14027 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14028 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14029 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14030 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14031 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14032 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14033 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14034 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14035 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14036 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14037 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14038 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14039 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14040 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14041 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14042 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14043
14044 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
14045 regsuffix, funcsuffix, Q) \
14046 __extension__ static __inline rettype \
14047 __attribute__ ((__always_inline__)) \
14048 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14049 { \
14050 rettype result; \
14051 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14052 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14053 : "=Q"(result) \
14054 : "Q"(*(const structtype *)ptr) \
14055 : "memory", "v16", "v17", "v18"); \
14056 return result; \
14057 }
14058
14059 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
14060 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
14061 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
14062 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
14063 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
14064 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
14065 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
14066 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
14067 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
14068 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
14069 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
14070 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
14071 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
14072 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
14073 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
14074 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
14075 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
14076 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
14077 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
14078 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
14079 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
14080 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
14081 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
14082 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
14083
14084 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
14085 lnsuffix, funcsuffix, Q) \
14086 __extension__ static __inline rettype \
14087 __attribute__ ((__always_inline__)) \
14088 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14089 rettype b, const int c) \
14090 { \
14091 rettype result; \
14092 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14093 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
14094 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14095 : "=Q"(result) \
14096 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14097 : "memory", "v16", "v17", "v18"); \
14098 return result; \
14099 }
14100
14101 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
14102 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14103 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14104 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14105 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14106 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14107 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14108 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14109 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14110 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14111 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14112 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14113 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14114 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14115 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14116 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14117 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14118 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14119 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14120 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14121 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14122 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14123 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14124 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14125
14126 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
14127 regsuffix, funcsuffix, Q) \
14128 __extension__ static __inline rettype \
14129 __attribute__ ((__always_inline__)) \
14130 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14131 { \
14132 rettype result; \
14133 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14134 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14135 : "=Q"(result) \
14136 : "Q"(*(const structtype *)ptr) \
14137 : "memory", "v16", "v17", "v18", "v19"); \
14138 return result; \
14139 }
14140
14141 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
14142 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
14143 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
14144 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
14145 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
14146 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
14147 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
14148 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
14149 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
14150 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
14151 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
14152 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
14153 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
14154 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
14155 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
14156 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
14157 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
14158 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
14159 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
14160 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
14161 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
14162 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
14163 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
14164 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
14165
14166 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
14167 lnsuffix, funcsuffix, Q) \
14168 __extension__ static __inline rettype \
14169 __attribute__ ((__always_inline__)) \
14170 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14171 rettype b, const int c) \
14172 { \
14173 rettype result; \
14174 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14175 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
14176 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14177 : "=Q"(result) \
14178 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14179 : "memory", "v16", "v17", "v18", "v19"); \
14180 return result; \
14181 }
14182
14183 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
14184 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14185 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14186 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14187 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14188 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14189 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14190 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14191 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14192 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14193 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14194 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14195 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14196 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14197 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14198 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14199 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14200 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14201 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14202 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14203 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14204 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14205 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14206 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14207
14208 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
14209 lnsuffix, funcsuffix, Q) \
14210 typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \
14211 __extension__ static __inline void \
14212 __attribute__ ((__always_inline__)) \
14213 vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14214 intype b, const int c) \
14215 { \
14216 __ST2_LANE_STRUCTURE_##intype *__p = \
14217 (__ST2_LANE_STRUCTURE_##intype *)ptr; \
14218 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14219 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
14220 : "=Q"(*__p) \
14221 : "Q"(b), "i"(c) \
14222 : "v16", "v17"); \
14223 }
14224
14225 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
14226 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14227 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14228 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14229 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14230 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14231 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14232 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14233 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14234 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14235 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14236 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14237 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14238 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14239 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14240 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14241 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14242 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14243 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14244 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14245 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14246 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14247 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14248 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14249
14250 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
14251 lnsuffix, funcsuffix, Q) \
14252 typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \
14253 __extension__ static __inline void \
14254 __attribute__ ((__always_inline__)) \
14255 vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14256 intype b, const int c) \
14257 { \
14258 __ST3_LANE_STRUCTURE_##intype *__p = \
14259 (__ST3_LANE_STRUCTURE_##intype *)ptr; \
14260 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14261 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
14262 : "=Q"(*__p) \
14263 : "Q"(b), "i"(c) \
14264 : "v16", "v17", "v18"); \
14265 }
14266
14267 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
14268 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14269 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14270 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14271 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14272 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14273 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14274 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14275 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14276 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14277 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14278 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14279 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14280 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14281 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14282 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14283 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14284 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14285 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14286 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14287 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14288 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14289 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14290 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14291
14292 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
14293 lnsuffix, funcsuffix, Q) \
14294 typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \
14295 __extension__ static __inline void \
14296 __attribute__ ((__always_inline__)) \
14297 vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14298 intype b, const int c) \
14299 { \
14300 __ST4_LANE_STRUCTURE_##intype *__p = \
14301 (__ST4_LANE_STRUCTURE_##intype *)ptr; \
14302 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14303 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
14304 : "=Q"(*__p) \
14305 : "Q"(b), "i"(c) \
14306 : "v16", "v17", "v18", "v19"); \
14307 }
14308
14309 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
14310 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14311 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14312 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14313 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14314 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14315 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14316 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14317 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14318 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14319 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14320 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14321 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14322 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14323 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14324 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14325 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14326 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14327 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14328 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14329 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14330 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14331 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14332 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14333
14334 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14335 vaddlv_s32 (int32x2_t a)
14336 {
14337 int64_t result;
14338 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14339 return result;
14340 }
14341
14342 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14343 vaddlv_u32 (uint32x2_t a)
14344 {
14345 uint64_t result;
14346 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14347 return result;
14348 }
14349
14350 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14351 vpaddd_s64 (int64x2_t __a)
14352 {
14353 return __builtin_aarch64_addpdi (__a);
14354 }
14355
14356 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14357 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14358 {
14359 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
14360 }
14361
14362 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14363 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14364 {
14365 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
14366 }
14367
14368 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14369 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14370 {
14371 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
14372 }
14373
14374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14375 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14376 {
14377 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
14378 }
14379
14380 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14381 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14382 {
14383 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
14384 }
14385
14386 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14387 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14388 {
14389 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
14390 }
14391
14392 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14393 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14394 {
14395 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
14396 }
14397
14398 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14399 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14400 {
14401 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
14402 }
14403
14404 /* Table intrinsics. */
14405
14406 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14407 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
14408 {
14409 poly8x8_t result;
14410 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14411 : "=w"(result)
14412 : "w"(a), "w"(b)
14413 : /* No clobbers */);
14414 return result;
14415 }
14416
14417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14418 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
14419 {
14420 int8x8_t result;
14421 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14422 : "=w"(result)
14423 : "w"(a), "w"(b)
14424 : /* No clobbers */);
14425 return result;
14426 }
14427
14428 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14429 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
14430 {
14431 uint8x8_t result;
14432 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14433 : "=w"(result)
14434 : "w"(a), "w"(b)
14435 : /* No clobbers */);
14436 return result;
14437 }
14438
14439 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14440 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
14441 {
14442 poly8x16_t result;
14443 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14444 : "=w"(result)
14445 : "w"(a), "w"(b)
14446 : /* No clobbers */);
14447 return result;
14448 }
14449
14450 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14451 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
14452 {
14453 int8x16_t result;
14454 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14455 : "=w"(result)
14456 : "w"(a), "w"(b)
14457 : /* No clobbers */);
14458 return result;
14459 }
14460
14461 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14462 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
14463 {
14464 uint8x16_t result;
14465 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14466 : "=w"(result)
14467 : "w"(a), "w"(b)
14468 : /* No clobbers */);
14469 return result;
14470 }
14471
14472 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14473 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
14474 {
14475 int8x8_t result;
14476 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14477 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14478 :"=w"(result)
14479 :"Q"(tab),"w"(idx)
14480 :"memory", "v16", "v17");
14481 return result;
14482 }
14483
14484 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14485 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
14486 {
14487 uint8x8_t result;
14488 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14489 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14490 :"=w"(result)
14491 :"Q"(tab),"w"(idx)
14492 :"memory", "v16", "v17");
14493 return result;
14494 }
14495
14496 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14497 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
14498 {
14499 poly8x8_t result;
14500 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14501 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14502 :"=w"(result)
14503 :"Q"(tab),"w"(idx)
14504 :"memory", "v16", "v17");
14505 return result;
14506 }
14507
14508 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14509 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
14510 {
14511 int8x16_t result;
14512 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14513 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14514 :"=w"(result)
14515 :"Q"(tab),"w"(idx)
14516 :"memory", "v16", "v17");
14517 return result;
14518 }
14519
14520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14521 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
14522 {
14523 uint8x16_t result;
14524 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14525 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14526 :"=w"(result)
14527 :"Q"(tab),"w"(idx)
14528 :"memory", "v16", "v17");
14529 return result;
14530 }
14531
14532 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14533 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
14534 {
14535 poly8x16_t result;
14536 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14537 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14538 :"=w"(result)
14539 :"Q"(tab),"w"(idx)
14540 :"memory", "v16", "v17");
14541 return result;
14542 }
14543
14544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14545 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
14546 {
14547 int8x8_t result;
14548 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14549 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14550 :"=w"(result)
14551 :"Q"(tab),"w"(idx)
14552 :"memory", "v16", "v17", "v18");
14553 return result;
14554 }
14555
14556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14557 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
14558 {
14559 uint8x8_t result;
14560 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14561 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14562 :"=w"(result)
14563 :"Q"(tab),"w"(idx)
14564 :"memory", "v16", "v17", "v18");
14565 return result;
14566 }
14567
14568 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14569 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
14570 {
14571 poly8x8_t result;
14572 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14573 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14574 :"=w"(result)
14575 :"Q"(tab),"w"(idx)
14576 :"memory", "v16", "v17", "v18");
14577 return result;
14578 }
14579
14580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14581 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
14582 {
14583 int8x16_t result;
14584 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14585 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14586 :"=w"(result)
14587 :"Q"(tab),"w"(idx)
14588 :"memory", "v16", "v17", "v18");
14589 return result;
14590 }
14591
14592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14593 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
14594 {
14595 uint8x16_t result;
14596 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14597 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14598 :"=w"(result)
14599 :"Q"(tab),"w"(idx)
14600 :"memory", "v16", "v17", "v18");
14601 return result;
14602 }
14603
14604 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14605 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
14606 {
14607 poly8x16_t result;
14608 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14609 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14610 :"=w"(result)
14611 :"Q"(tab),"w"(idx)
14612 :"memory", "v16", "v17", "v18");
14613 return result;
14614 }
14615
14616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14617 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
14618 {
14619 int8x8_t result;
14620 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14621 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14622 :"=w"(result)
14623 :"Q"(tab),"w"(idx)
14624 :"memory", "v16", "v17", "v18", "v19");
14625 return result;
14626 }
14627
14628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14629 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
14630 {
14631 uint8x8_t result;
14632 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14633 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14634 :"=w"(result)
14635 :"Q"(tab),"w"(idx)
14636 :"memory", "v16", "v17", "v18", "v19");
14637 return result;
14638 }
14639
14640 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14641 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
14642 {
14643 poly8x8_t result;
14644 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14645 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14646 :"=w"(result)
14647 :"Q"(tab),"w"(idx)
14648 :"memory", "v16", "v17", "v18", "v19");
14649 return result;
14650 }
14651
14652
14653 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14654 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
14655 {
14656 int8x16_t result;
14657 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14658 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14659 :"=w"(result)
14660 :"Q"(tab),"w"(idx)
14661 :"memory", "v16", "v17", "v18", "v19");
14662 return result;
14663 }
14664
14665 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14666 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
14667 {
14668 uint8x16_t result;
14669 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14670 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14671 :"=w"(result)
14672 :"Q"(tab),"w"(idx)
14673 :"memory", "v16", "v17", "v18", "v19");
14674 return result;
14675 }
14676
14677 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14678 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
14679 {
14680 poly8x16_t result;
14681 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14682 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14683 :"=w"(result)
14684 :"Q"(tab),"w"(idx)
14685 :"memory", "v16", "v17", "v18", "v19");
14686 return result;
14687 }
14688
14689
14690 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14691 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
14692 {
14693 int8x8_t result = r;
14694 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14695 : "+w"(result)
14696 : "w"(tab), "w"(idx)
14697 : /* No clobbers */);
14698 return result;
14699 }
14700
14701 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14702 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
14703 {
14704 uint8x8_t result = r;
14705 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14706 : "+w"(result)
14707 : "w"(tab), "w"(idx)
14708 : /* No clobbers */);
14709 return result;
14710 }
14711
14712 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14713 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
14714 {
14715 poly8x8_t result = r;
14716 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14717 : "+w"(result)
14718 : "w"(tab), "w"(idx)
14719 : /* No clobbers */);
14720 return result;
14721 }
14722
14723 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14724 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
14725 {
14726 int8x16_t result = r;
14727 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14728 : "+w"(result)
14729 : "w"(tab), "w"(idx)
14730 : /* No clobbers */);
14731 return result;
14732 }
14733
14734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14735 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
14736 {
14737 uint8x16_t result = r;
14738 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14739 : "+w"(result)
14740 : "w"(tab), "w"(idx)
14741 : /* No clobbers */);
14742 return result;
14743 }
14744
14745 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14746 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
14747 {
14748 poly8x16_t result = r;
14749 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14750 : "+w"(result)
14751 : "w"(tab), "w"(idx)
14752 : /* No clobbers */);
14753 return result;
14754 }
14755
14756 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14757 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
14758 {
14759 int8x8_t result = r;
14760 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14761 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14762 :"+w"(result)
14763 :"Q"(tab),"w"(idx)
14764 :"memory", "v16", "v17");
14765 return result;
14766 }
14767
14768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14769 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
14770 {
14771 uint8x8_t result = r;
14772 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14773 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14774 :"+w"(result)
14775 :"Q"(tab),"w"(idx)
14776 :"memory", "v16", "v17");
14777 return result;
14778 }
14779
14780 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14781 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
14782 {
14783 poly8x8_t result = r;
14784 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14785 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14786 :"+w"(result)
14787 :"Q"(tab),"w"(idx)
14788 :"memory", "v16", "v17");
14789 return result;
14790 }
14791
14792
14793 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14794 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
14795 {
14796 int8x16_t result = r;
14797 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14798 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14799 :"+w"(result)
14800 :"Q"(tab),"w"(idx)
14801 :"memory", "v16", "v17");
14802 return result;
14803 }
14804
14805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14806 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
14807 {
14808 uint8x16_t result = r;
14809 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14810 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14811 :"+w"(result)
14812 :"Q"(tab),"w"(idx)
14813 :"memory", "v16", "v17");
14814 return result;
14815 }
14816
14817 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14818 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
14819 {
14820 poly8x16_t result = r;
14821 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14822 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14823 :"+w"(result)
14824 :"Q"(tab),"w"(idx)
14825 :"memory", "v16", "v17");
14826 return result;
14827 }
14828
14829
14830 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14831 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
14832 {
14833 int8x8_t result = r;
14834 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14835 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14836 :"+w"(result)
14837 :"Q"(tab),"w"(idx)
14838 :"memory", "v16", "v17", "v18");
14839 return result;
14840 }
14841
14842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14843 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
14844 {
14845 uint8x8_t result = r;
14846 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14847 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14848 :"+w"(result)
14849 :"Q"(tab),"w"(idx)
14850 :"memory", "v16", "v17", "v18");
14851 return result;
14852 }
14853
14854 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14855 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
14856 {
14857 poly8x8_t result = r;
14858 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14859 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14860 :"+w"(result)
14861 :"Q"(tab),"w"(idx)
14862 :"memory", "v16", "v17", "v18");
14863 return result;
14864 }
14865
14866
14867 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14868 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
14869 {
14870 int8x16_t result = r;
14871 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14872 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14873 :"+w"(result)
14874 :"Q"(tab),"w"(idx)
14875 :"memory", "v16", "v17", "v18");
14876 return result;
14877 }
14878
14879 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14880 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
14881 {
14882 uint8x16_t result = r;
14883 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14884 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14885 :"+w"(result)
14886 :"Q"(tab),"w"(idx)
14887 :"memory", "v16", "v17", "v18");
14888 return result;
14889 }
14890
14891 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14892 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
14893 {
14894 poly8x16_t result = r;
14895 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14896 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14897 :"+w"(result)
14898 :"Q"(tab),"w"(idx)
14899 :"memory", "v16", "v17", "v18");
14900 return result;
14901 }
14902
14903
14904 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14905 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
14906 {
14907 int8x8_t result = r;
14908 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14909 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14910 :"+w"(result)
14911 :"Q"(tab),"w"(idx)
14912 :"memory", "v16", "v17", "v18", "v19");
14913 return result;
14914 }
14915
14916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14917 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
14918 {
14919 uint8x8_t result = r;
14920 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14921 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14922 :"+w"(result)
14923 :"Q"(tab),"w"(idx)
14924 :"memory", "v16", "v17", "v18", "v19");
14925 return result;
14926 }
14927
14928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14929 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
14930 {
14931 poly8x8_t result = r;
14932 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14933 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14934 :"+w"(result)
14935 :"Q"(tab),"w"(idx)
14936 :"memory", "v16", "v17", "v18", "v19");
14937 return result;
14938 }
14939
14940
14941 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14942 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
14943 {
14944 int8x16_t result = r;
14945 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14946 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14947 :"+w"(result)
14948 :"Q"(tab),"w"(idx)
14949 :"memory", "v16", "v17", "v18", "v19");
14950 return result;
14951 }
14952
14953 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14954 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
14955 {
14956 uint8x16_t result = r;
14957 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14958 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14959 :"+w"(result)
14960 :"Q"(tab),"w"(idx)
14961 :"memory", "v16", "v17", "v18", "v19");
14962 return result;
14963 }
14964
14965 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14966 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
14967 {
14968 poly8x16_t result = r;
14969 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14970 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14971 :"+w"(result)
14972 :"Q"(tab),"w"(idx)
14973 :"memory", "v16", "v17", "v18", "v19");
14974 return result;
14975 }
14976
14977 /* V7 legacy table intrinsics. */
14978
14979 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14980 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
14981 {
14982 int8x8_t result;
14983 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
14984 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14985 : "=w"(result)
14986 : "w"(temp), "w"(idx)
14987 : /* No clobbers */);
14988 return result;
14989 }
14990
14991 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14992 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
14993 {
14994 uint8x8_t result;
14995 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
14996 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14997 : "=w"(result)
14998 : "w"(temp), "w"(idx)
14999 : /* No clobbers */);
15000 return result;
15001 }
15002
15003 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15004 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
15005 {
15006 poly8x8_t result;
15007 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15008 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15009 : "=w"(result)
15010 : "w"(temp), "w"(idx)
15011 : /* No clobbers */);
15012 return result;
15013 }
15014
15015 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15016 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
15017 {
15018 int8x8_t result;
15019 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15020 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15021 : "=w"(result)
15022 : "w"(temp), "w"(idx)
15023 : /* No clobbers */);
15024 return result;
15025 }
15026
15027 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15028 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
15029 {
15030 uint8x8_t result;
15031 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15032 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15033 : "=w"(result)
15034 : "w"(temp), "w"(idx)
15035 : /* No clobbers */);
15036 return result;
15037 }
15038
15039 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15040 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
15041 {
15042 poly8x8_t result;
15043 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15044 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15045 : "=w"(result)
15046 : "w"(temp), "w"(idx)
15047 : /* No clobbers */);
15048 return result;
15049 }
15050
15051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15052 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
15053 {
15054 int8x8_t result;
15055 int8x16x2_t temp;
15056 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15057 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15058 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15059 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15060 : "=w"(result)
15061 : "Q"(temp), "w"(idx)
15062 : "v16", "v17", "memory");
15063 return result;
15064 }
15065
15066 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15067 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
15068 {
15069 uint8x8_t result;
15070 uint8x16x2_t temp;
15071 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15072 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15073 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15074 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15075 : "=w"(result)
15076 : "Q"(temp), "w"(idx)
15077 : "v16", "v17", "memory");
15078 return result;
15079 }
15080
15081 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15082 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
15083 {
15084 poly8x8_t result;
15085 poly8x16x2_t temp;
15086 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15087 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15088 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15089 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15090 : "=w"(result)
15091 : "Q"(temp), "w"(idx)
15092 : "v16", "v17", "memory");
15093 return result;
15094 }
15095
15096 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15097 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
15098 {
15099 int8x8_t result;
15100 int8x16x2_t temp;
15101 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15102 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15103 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15104 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15105 : "=w"(result)
15106 : "Q"(temp), "w"(idx)
15107 : "v16", "v17", "memory");
15108 return result;
15109 }
15110
15111 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15112 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
15113 {
15114 uint8x8_t result;
15115 uint8x16x2_t temp;
15116 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15117 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15118 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15119 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15120 : "=w"(result)
15121 : "Q"(temp), "w"(idx)
15122 : "v16", "v17", "memory");
15123 return result;
15124 }
15125
15126 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15127 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
15128 {
15129 poly8x8_t result;
15130 poly8x16x2_t temp;
15131 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15132 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15133 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15134 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15135 : "=w"(result)
15136 : "Q"(temp), "w"(idx)
15137 : "v16", "v17", "memory");
15138 return result;
15139 }
15140
15141 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15142 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
15143 {
15144 int8x8_t result = r;
15145 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15146 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15147 : "+w"(result)
15148 : "w"(temp), "w"(idx)
15149 : /* No clobbers */);
15150 return result;
15151 }
15152
15153 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15154 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
15155 {
15156 uint8x8_t result = r;
15157 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15158 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15159 : "+w"(result)
15160 : "w"(temp), "w"(idx)
15161 : /* No clobbers */);
15162 return result;
15163 }
15164
15165 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15166 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
15167 {
15168 poly8x8_t result = r;
15169 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15170 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15171 : "+w"(result)
15172 : "w"(temp), "w"(idx)
15173 : /* No clobbers */);
15174 return result;
15175 }
15176
15177 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15178 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
15179 {
15180 int8x8_t result = r;
15181 int8x16x2_t temp;
15182 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15183 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15184 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15185 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15186 : "+w"(result)
15187 : "Q"(temp), "w"(idx)
15188 : "v16", "v17", "memory");
15189 return result;
15190 }
15191
15192 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15193 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
15194 {
15195 uint8x8_t result = r;
15196 uint8x16x2_t temp;
15197 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15198 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15199 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15200 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15201 : "+w"(result)
15202 : "Q"(temp), "w"(idx)
15203 : "v16", "v17", "memory");
15204 return result;
15205 }
15206
15207 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15208 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
15209 {
15210 poly8x8_t result = r;
15211 poly8x16x2_t temp;
15212 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15213 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15214 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15215 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15216 : "+w"(result)
15217 : "Q"(temp), "w"(idx)
15218 : "v16", "v17", "memory");
15219 return result;
15220 }
15221
15222 /* End of temporary inline asm. */
15223
15224 /* Start of optimal implementations in approved order. */
15225
15226 /* vabs */
15227
15228 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15229 vabs_f32 (float32x2_t __a)
15230 {
15231 return __builtin_aarch64_absv2sf (__a);
15232 }
15233
15234 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15235 vabs_f64 (float64x1_t __a)
15236 {
15237 return __builtin_fabs (__a);
15238 }
15239
15240 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15241 vabs_s8 (int8x8_t __a)
15242 {
15243 return __builtin_aarch64_absv8qi (__a);
15244 }
15245
15246 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15247 vabs_s16 (int16x4_t __a)
15248 {
15249 return __builtin_aarch64_absv4hi (__a);
15250 }
15251
15252 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15253 vabs_s32 (int32x2_t __a)
15254 {
15255 return __builtin_aarch64_absv2si (__a);
15256 }
15257
15258 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15259 vabs_s64 (int64x1_t __a)
15260 {
15261 return __builtin_llabs (__a);
15262 }
15263
15264 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15265 vabsq_f32 (float32x4_t __a)
15266 {
15267 return __builtin_aarch64_absv4sf (__a);
15268 }
15269
15270 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15271 vabsq_f64 (float64x2_t __a)
15272 {
15273 return __builtin_aarch64_absv2df (__a);
15274 }
15275
15276 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15277 vabsq_s8 (int8x16_t __a)
15278 {
15279 return __builtin_aarch64_absv16qi (__a);
15280 }
15281
15282 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15283 vabsq_s16 (int16x8_t __a)
15284 {
15285 return __builtin_aarch64_absv8hi (__a);
15286 }
15287
15288 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15289 vabsq_s32 (int32x4_t __a)
15290 {
15291 return __builtin_aarch64_absv4si (__a);
15292 }
15293
15294 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15295 vabsq_s64 (int64x2_t __a)
15296 {
15297 return __builtin_aarch64_absv2di (__a);
15298 }
15299
15300 /* vadd */
15301
15302 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15303 vaddd_s64 (int64x1_t __a, int64x1_t __b)
15304 {
15305 return __a + __b;
15306 }
15307
15308 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15309 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
15310 {
15311 return __a + __b;
15312 }
15313
15314 #if __AARCH64EB__
15315 #define __LANE0(__t) ((__t) - 1)
15316 #else
15317 #define __LANE0(__t) 0
15318 #endif
15319
15320 /* vaddv */
15321
15322 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15323 vaddv_s8 (int8x8_t __a)
15324 {
15325 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), __LANE0 (8));
15326 }
15327
15328 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15329 vaddv_s16 (int16x4_t __a)
15330 {
15331 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), __LANE0 (4));
15332 }
15333
15334 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15335 vaddv_s32 (int32x2_t __a)
15336 {
15337 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), __LANE0 (2));
15338 }
15339
15340 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15341 vaddv_u8 (uint8x8_t __a)
15342 {
15343 return vget_lane_u8 ((uint8x8_t)
15344 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
15345 __LANE0 (8));
15346 }
15347
15348 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15349 vaddv_u16 (uint16x4_t __a)
15350 {
15351 return vget_lane_u16 ((uint16x4_t)
15352 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
15353 __LANE0 (4));
15354 }
15355
15356 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15357 vaddv_u32 (uint32x2_t __a)
15358 {
15359 return vget_lane_u32 ((uint32x2_t)
15360 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
15361 __LANE0 (2));
15362 }
15363
15364 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
15365 vaddvq_s8 (int8x16_t __a)
15366 {
15367 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
15368 __LANE0 (16));
15369 }
15370
15371 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
15372 vaddvq_s16 (int16x8_t __a)
15373 {
15374 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), __LANE0 (8));
15375 }
15376
15377 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15378 vaddvq_s32 (int32x4_t __a)
15379 {
15380 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), __LANE0 (4));
15381 }
15382
15383 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15384 vaddvq_s64 (int64x2_t __a)
15385 {
15386 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), __LANE0 (2));
15387 }
15388
15389 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15390 vaddvq_u8 (uint8x16_t __a)
15391 {
15392 return vgetq_lane_u8 ((uint8x16_t)
15393 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
15394 __LANE0 (16));
15395 }
15396
15397 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15398 vaddvq_u16 (uint16x8_t __a)
15399 {
15400 return vgetq_lane_u16 ((uint16x8_t)
15401 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
15402 __LANE0 (8));
15403 }
15404
15405 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15406 vaddvq_u32 (uint32x4_t __a)
15407 {
15408 return vgetq_lane_u32 ((uint32x4_t)
15409 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
15410 __LANE0 (4));
15411 }
15412
15413 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15414 vaddvq_u64 (uint64x2_t __a)
15415 {
15416 return vgetq_lane_u64 ((uint64x2_t)
15417 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
15418 __LANE0 (2));
15419 }
15420
15421 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15422 vaddv_f32 (float32x2_t __a)
15423 {
15424 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
15425 return vget_lane_f32 (__t, __LANE0 (2));
15426 }
15427
15428 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15429 vaddvq_f32 (float32x4_t __a)
15430 {
15431 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
15432 return vgetq_lane_f32 (__t, __LANE0 (4));
15433 }
15434
15435 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15436 vaddvq_f64 (float64x2_t __a)
15437 {
15438 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
15439 return vgetq_lane_f64 (__t, __LANE0 (2));
15440 }
15441
15442 /* vbsl */
15443
15444 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15445 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
15446 {
15447 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
15448 }
15449
15450 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15451 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
15452 {
15453 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
15454 }
15455
15456 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15457 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
15458 {
15459 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
15460 }
15461
15462 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15463 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
15464 {
15465 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
15466 }
15467
15468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15469 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
15470 {
15471 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
15472 }
15473
15474 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15475 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
15476 {
15477 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
15478 }
15479
15480 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15481 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
15482 {
15483 return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
15484 }
15485
15486 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15487 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
15488 {
15489 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
15490 }
15491
15492 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15493 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
15494 {
15495 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
15496 }
15497
15498 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15499 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
15500 {
15501 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
15502 }
15503
15504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15505 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
15506 {
15507 return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
15508 }
15509
15510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15511 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
15512 {
15513 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
15514 }
15515
15516 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15517 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
15518 {
15519 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
15520 }
15521
15522 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15523 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
15524 {
15525 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
15526 }
15527
15528 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15529 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
15530 {
15531 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
15532 }
15533
15534 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15535 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
15536 {
15537 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
15538 }
15539
15540 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15541 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
15542 {
15543 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
15544 }
15545
15546 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15547 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
15548 {
15549 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
15550 }
15551
15552 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15553 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
15554 {
15555 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
15556 }
15557
15558 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15559 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
15560 {
15561 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
15562 }
15563
15564 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15565 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
15566 {
15567 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
15568 }
15569
15570 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15571 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
15572 {
15573 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
15574 }
15575
15576 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15577 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
15578 {
15579 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
15580 }
15581
15582 #ifdef __ARM_FEATURE_CRYPTO
15583
15584 /* vaes */
15585
15586 static __inline uint8x16_t
15587 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
15588 {
15589 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
15590 }
15591
15592 static __inline uint8x16_t
15593 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
15594 {
15595 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
15596 }
15597
15598 static __inline uint8x16_t
15599 vaesmcq_u8 (uint8x16_t data)
15600 {
15601 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
15602 }
15603
15604 static __inline uint8x16_t
15605 vaesimcq_u8 (uint8x16_t data)
15606 {
15607 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
15608 }
15609
15610 #endif
15611
15612 /* vcage */
15613
15614 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15615 vcages_f32 (float32_t __a, float32_t __b)
15616 {
15617 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
15618 }
15619
15620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15621 vcage_f32 (float32x2_t __a, float32x2_t __b)
15622 {
15623 return vabs_f32 (__a) >= vabs_f32 (__b);
15624 }
15625
15626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15627 vcageq_f32 (float32x4_t __a, float32x4_t __b)
15628 {
15629 return vabsq_f32 (__a) >= vabsq_f32 (__b);
15630 }
15631
15632 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15633 vcaged_f64 (float64_t __a, float64_t __b)
15634 {
15635 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
15636 }
15637
15638 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15639 vcageq_f64 (float64x2_t __a, float64x2_t __b)
15640 {
15641 return vabsq_f64 (__a) >= vabsq_f64 (__b);
15642 }
15643
15644 /* vcagt */
15645
15646 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15647 vcagts_f32 (float32_t __a, float32_t __b)
15648 {
15649 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
15650 }
15651
15652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15653 vcagt_f32 (float32x2_t __a, float32x2_t __b)
15654 {
15655 return vabs_f32 (__a) > vabs_f32 (__b);
15656 }
15657
15658 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15659 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
15660 {
15661 return vabsq_f32 (__a) > vabsq_f32 (__b);
15662 }
15663
15664 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15665 vcagtd_f64 (float64_t __a, float64_t __b)
15666 {
15667 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
15668 }
15669
15670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15671 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
15672 {
15673 return vabsq_f64 (__a) > vabsq_f64 (__b);
15674 }
15675
15676 /* vcale */
15677
15678 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15679 vcale_f32 (float32x2_t __a, float32x2_t __b)
15680 {
15681 return vabs_f32 (__a) <= vabs_f32 (__b);
15682 }
15683
15684 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15685 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
15686 {
15687 return vabsq_f32 (__a) <= vabsq_f32 (__b);
15688 }
15689
15690 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15691 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
15692 {
15693 return vabsq_f64 (__a) <= vabsq_f64 (__b);
15694 }
15695
15696 /* vcalt */
15697
15698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15699 vcalt_f32 (float32x2_t __a, float32x2_t __b)
15700 {
15701 return vabs_f32 (__a) < vabs_f32 (__b);
15702 }
15703
15704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15705 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
15706 {
15707 return vabsq_f32 (__a) < vabsq_f32 (__b);
15708 }
15709
15710 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15711 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
15712 {
15713 return vabsq_f64 (__a) < vabsq_f64 (__b);
15714 }
15715
15716 /* vceq - vector. */
15717
15718 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15719 vceq_f32 (float32x2_t __a, float32x2_t __b)
15720 {
15721 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15722 }
15723
15724 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15725 vceq_f64 (float64x1_t __a, float64x1_t __b)
15726 {
15727 return __a == __b ? -1ll : 0ll;
15728 }
15729
15730 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15731 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
15732 {
15733 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15734 (int8x8_t) __b);
15735 }
15736
15737 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15738 vceq_s8 (int8x8_t __a, int8x8_t __b)
15739 {
15740 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15741 }
15742
15743 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15744 vceq_s16 (int16x4_t __a, int16x4_t __b)
15745 {
15746 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15747 }
15748
15749 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15750 vceq_s32 (int32x2_t __a, int32x2_t __b)
15751 {
15752 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15753 }
15754
15755 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15756 vceq_s64 (int64x1_t __a, int64x1_t __b)
15757 {
15758 return __a == __b ? -1ll : 0ll;
15759 }
15760
15761 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15762 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
15763 {
15764 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15765 (int8x8_t) __b);
15766 }
15767
15768 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15769 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
15770 {
15771 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15772 (int16x4_t) __b);
15773 }
15774
15775 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15776 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
15777 {
15778 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15779 (int32x2_t) __b);
15780 }
15781
15782 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15783 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
15784 {
15785 return __a == __b ? -1ll : 0ll;
15786 }
15787
15788 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15789 vceqq_f32 (float32x4_t __a, float32x4_t __b)
15790 {
15791 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15792 }
15793
15794 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15795 vceqq_f64 (float64x2_t __a, float64x2_t __b)
15796 {
15797 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15798 }
15799
15800 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15801 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
15802 {
15803 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15804 (int8x16_t) __b);
15805 }
15806
15807 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15808 vceqq_s8 (int8x16_t __a, int8x16_t __b)
15809 {
15810 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15811 }
15812
15813 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15814 vceqq_s16 (int16x8_t __a, int16x8_t __b)
15815 {
15816 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15817 }
15818
15819 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15820 vceqq_s32 (int32x4_t __a, int32x4_t __b)
15821 {
15822 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
15823 }
15824
15825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15826 vceqq_s64 (int64x2_t __a, int64x2_t __b)
15827 {
15828 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
15829 }
15830
15831 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15832 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
15833 {
15834 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15835 (int8x16_t) __b);
15836 }
15837
15838 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15839 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
15840 {
15841 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
15842 (int16x8_t) __b);
15843 }
15844
15845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15846 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
15847 {
15848 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
15849 (int32x4_t) __b);
15850 }
15851
15852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15853 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
15854 {
15855 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
15856 (int64x2_t) __b);
15857 }
15858
15859 /* vceq - scalar. */
15860
15861 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15862 vceqs_f32 (float32_t __a, float32_t __b)
15863 {
15864 return __a == __b ? -1 : 0;
15865 }
15866
15867 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15868 vceqd_s64 (int64x1_t __a, int64x1_t __b)
15869 {
15870 return __a == __b ? -1ll : 0ll;
15871 }
15872
15873 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15874 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
15875 {
15876 return __a == __b ? -1ll : 0ll;
15877 }
15878
15879 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15880 vceqd_f64 (float64_t __a, float64_t __b)
15881 {
15882 return __a == __b ? -1ll : 0ll;
15883 }
15884
15885 /* vceqz - vector. */
15886
15887 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15888 vceqz_f32 (float32x2_t __a)
15889 {
15890 float32x2_t __b = {0.0f, 0.0f};
15891 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15892 }
15893
15894 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15895 vceqz_f64 (float64x1_t __a)
15896 {
15897 return __a == 0.0 ? -1ll : 0ll;
15898 }
15899
15900 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15901 vceqz_p8 (poly8x8_t __a)
15902 {
15903 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15904 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15905 (int8x8_t) __b);
15906 }
15907
15908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15909 vceqz_s8 (int8x8_t __a)
15910 {
15911 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15912 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15913 }
15914
15915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15916 vceqz_s16 (int16x4_t __a)
15917 {
15918 int16x4_t __b = {0, 0, 0, 0};
15919 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15920 }
15921
15922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15923 vceqz_s32 (int32x2_t __a)
15924 {
15925 int32x2_t __b = {0, 0};
15926 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15927 }
15928
15929 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15930 vceqz_s64 (int64x1_t __a)
15931 {
15932 return __a == 0ll ? -1ll : 0ll;
15933 }
15934
15935 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15936 vceqz_u8 (uint8x8_t __a)
15937 {
15938 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15939 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15940 (int8x8_t) __b);
15941 }
15942
15943 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15944 vceqz_u16 (uint16x4_t __a)
15945 {
15946 uint16x4_t __b = {0, 0, 0, 0};
15947 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15948 (int16x4_t) __b);
15949 }
15950
15951 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15952 vceqz_u32 (uint32x2_t __a)
15953 {
15954 uint32x2_t __b = {0, 0};
15955 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15956 (int32x2_t) __b);
15957 }
15958
15959 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15960 vceqz_u64 (uint64x1_t __a)
15961 {
15962 return __a == 0ll ? -1ll : 0ll;
15963 }
15964
15965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15966 vceqzq_f32 (float32x4_t __a)
15967 {
15968 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15969 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15970 }
15971
15972 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15973 vceqzq_f64 (float64x2_t __a)
15974 {
15975 float64x2_t __b = {0.0, 0.0};
15976 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15977 }
15978
15979 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15980 vceqzq_p8 (poly8x16_t __a)
15981 {
15982 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15983 0, 0, 0, 0, 0, 0, 0, 0};
15984 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15985 (int8x16_t) __b);
15986 }
15987
15988 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15989 vceqzq_s8 (int8x16_t __a)
15990 {
15991 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15992 0, 0, 0, 0, 0, 0, 0, 0};
15993 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15994 }
15995
15996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15997 vceqzq_s16 (int16x8_t __a)
15998 {
15999 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16000 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
16001 }
16002
16003 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16004 vceqzq_s32 (int32x4_t __a)
16005 {
16006 int32x4_t __b = {0, 0, 0, 0};
16007 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
16008 }
16009
16010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16011 vceqzq_s64 (int64x2_t __a)
16012 {
16013 int64x2_t __b = {0, 0};
16014 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
16015 }
16016
16017 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16018 vceqzq_u8 (uint8x16_t __a)
16019 {
16020 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16021 0, 0, 0, 0, 0, 0, 0, 0};
16022 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16023 (int8x16_t) __b);
16024 }
16025
16026 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16027 vceqzq_u16 (uint16x8_t __a)
16028 {
16029 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16030 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
16031 (int16x8_t) __b);
16032 }
16033
16034 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16035 vceqzq_u32 (uint32x4_t __a)
16036 {
16037 uint32x4_t __b = {0, 0, 0, 0};
16038 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
16039 (int32x4_t) __b);
16040 }
16041
16042 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16043 vceqzq_u64 (uint64x2_t __a)
16044 {
16045 uint64x2_t __b = {0, 0};
16046 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
16047 (int64x2_t) __b);
16048 }
16049
16050 /* vceqz - scalar. */
16051
16052 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16053 vceqzs_f32 (float32_t __a)
16054 {
16055 return __a == 0.0f ? -1 : 0;
16056 }
16057
16058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16059 vceqzd_s64 (int64x1_t __a)
16060 {
16061 return __a == 0 ? -1ll : 0ll;
16062 }
16063
16064 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16065 vceqzd_u64 (int64x1_t __a)
16066 {
16067 return __a == 0 ? -1ll : 0ll;
16068 }
16069
16070 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16071 vceqzd_f64 (float64_t __a)
16072 {
16073 return __a == 0.0 ? -1ll : 0ll;
16074 }
16075
16076 /* vcge - vector. */
16077
16078 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16079 vcge_f32 (float32x2_t __a, float32x2_t __b)
16080 {
16081 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16082 }
16083
16084 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16085 vcge_f64 (float64x1_t __a, float64x1_t __b)
16086 {
16087 return __a >= __b ? -1ll : 0ll;
16088 }
16089
16090 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16091 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
16092 {
16093 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16094 (int8x8_t) __b);
16095 }
16096
16097 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16098 vcge_s8 (int8x8_t __a, int8x8_t __b)
16099 {
16100 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16101 }
16102
16103 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16104 vcge_s16 (int16x4_t __a, int16x4_t __b)
16105 {
16106 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16107 }
16108
16109 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16110 vcge_s32 (int32x2_t __a, int32x2_t __b)
16111 {
16112 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16113 }
16114
16115 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16116 vcge_s64 (int64x1_t __a, int64x1_t __b)
16117 {
16118 return __a >= __b ? -1ll : 0ll;
16119 }
16120
16121 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16122 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
16123 {
16124 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16125 (int8x8_t) __b);
16126 }
16127
16128 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16129 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
16130 {
16131 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16132 (int16x4_t) __b);
16133 }
16134
16135 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16136 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
16137 {
16138 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16139 (int32x2_t) __b);
16140 }
16141
16142 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16143 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
16144 {
16145 return __a >= __b ? -1ll : 0ll;
16146 }
16147
16148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16149 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
16150 {
16151 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16152 }
16153
16154 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16155 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
16156 {
16157 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16158 }
16159
16160 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16161 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
16162 {
16163 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16164 (int8x16_t) __b);
16165 }
16166
16167 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16168 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
16169 {
16170 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16171 }
16172
16173 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16174 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
16175 {
16176 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16177 }
16178
16179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16180 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
16181 {
16182 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16183 }
16184
16185 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16186 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
16187 {
16188 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16189 }
16190
16191 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16192 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
16193 {
16194 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16195 (int8x16_t) __b);
16196 }
16197
16198 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16199 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
16200 {
16201 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16202 (int16x8_t) __b);
16203 }
16204
16205 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16206 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
16207 {
16208 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16209 (int32x4_t) __b);
16210 }
16211
16212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16213 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
16214 {
16215 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16216 (int64x2_t) __b);
16217 }
16218
16219 /* vcge - scalar. */
16220
16221 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16222 vcges_f32 (float32_t __a, float32_t __b)
16223 {
16224 return __a >= __b ? -1 : 0;
16225 }
16226
16227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16228 vcged_s64 (int64x1_t __a, int64x1_t __b)
16229 {
16230 return __a >= __b ? -1ll : 0ll;
16231 }
16232
16233 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16234 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
16235 {
16236 return __a >= __b ? -1ll : 0ll;
16237 }
16238
16239 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16240 vcged_f64 (float64_t __a, float64_t __b)
16241 {
16242 return __a >= __b ? -1ll : 0ll;
16243 }
16244
16245 /* vcgez - vector. */
16246
16247 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16248 vcgez_f32 (float32x2_t __a)
16249 {
16250 float32x2_t __b = {0.0f, 0.0f};
16251 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16252 }
16253
16254 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16255 vcgez_f64 (float64x1_t __a)
16256 {
16257 return __a >= 0.0 ? -1ll : 0ll;
16258 }
16259
16260 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16261 vcgez_p8 (poly8x8_t __a)
16262 {
16263 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16264 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16265 (int8x8_t) __b);
16266 }
16267
16268 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16269 vcgez_s8 (int8x8_t __a)
16270 {
16271 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16272 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16273 }
16274
16275 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16276 vcgez_s16 (int16x4_t __a)
16277 {
16278 int16x4_t __b = {0, 0, 0, 0};
16279 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16280 }
16281
16282 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16283 vcgez_s32 (int32x2_t __a)
16284 {
16285 int32x2_t __b = {0, 0};
16286 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16287 }
16288
16289 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16290 vcgez_s64 (int64x1_t __a)
16291 {
16292 return __a >= 0ll ? -1ll : 0ll;
16293 }
16294
16295 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16296 vcgez_u8 (uint8x8_t __a)
16297 {
16298 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16299 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16300 (int8x8_t) __b);
16301 }
16302
16303 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16304 vcgez_u16 (uint16x4_t __a)
16305 {
16306 uint16x4_t __b = {0, 0, 0, 0};
16307 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16308 (int16x4_t) __b);
16309 }
16310
16311 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16312 vcgez_u32 (uint32x2_t __a)
16313 {
16314 uint32x2_t __b = {0, 0};
16315 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16316 (int32x2_t) __b);
16317 }
16318
16319 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16320 vcgez_u64 (uint64x1_t __a)
16321 {
16322 return __a >= 0ll ? -1ll : 0ll;
16323 }
16324
16325 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16326 vcgezq_f32 (float32x4_t __a)
16327 {
16328 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16329 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16330 }
16331
16332 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16333 vcgezq_f64 (float64x2_t __a)
16334 {
16335 float64x2_t __b = {0.0, 0.0};
16336 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16337 }
16338
16339 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16340 vcgezq_p8 (poly8x16_t __a)
16341 {
16342 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16343 0, 0, 0, 0, 0, 0, 0, 0};
16344 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16345 (int8x16_t) __b);
16346 }
16347
16348 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16349 vcgezq_s8 (int8x16_t __a)
16350 {
16351 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16352 0, 0, 0, 0, 0, 0, 0, 0};
16353 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16354 }
16355
16356 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16357 vcgezq_s16 (int16x8_t __a)
16358 {
16359 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16360 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16361 }
16362
16363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16364 vcgezq_s32 (int32x4_t __a)
16365 {
16366 int32x4_t __b = {0, 0, 0, 0};
16367 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16368 }
16369
16370 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16371 vcgezq_s64 (int64x2_t __a)
16372 {
16373 int64x2_t __b = {0, 0};
16374 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16375 }
16376
16377 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16378 vcgezq_u8 (uint8x16_t __a)
16379 {
16380 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16381 0, 0, 0, 0, 0, 0, 0, 0};
16382 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16383 (int8x16_t) __b);
16384 }
16385
16386 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16387 vcgezq_u16 (uint16x8_t __a)
16388 {
16389 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16390 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16391 (int16x8_t) __b);
16392 }
16393
16394 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16395 vcgezq_u32 (uint32x4_t __a)
16396 {
16397 uint32x4_t __b = {0, 0, 0, 0};
16398 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16399 (int32x4_t) __b);
16400 }
16401
16402 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16403 vcgezq_u64 (uint64x2_t __a)
16404 {
16405 uint64x2_t __b = {0, 0};
16406 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16407 (int64x2_t) __b);
16408 }
16409
16410 /* vcgez - scalar. */
16411
16412 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16413 vcgezs_f32 (float32_t __a)
16414 {
16415 return __a >= 0.0f ? -1 : 0;
16416 }
16417
16418 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16419 vcgezd_s64 (int64x1_t __a)
16420 {
16421 return __a >= 0 ? -1ll : 0ll;
16422 }
16423
16424 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16425 vcgezd_u64 (int64x1_t __a)
16426 {
16427 return __a >= 0 ? -1ll : 0ll;
16428 }
16429
16430 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16431 vcgezd_f64 (float64_t __a)
16432 {
16433 return __a >= 0.0 ? -1ll : 0ll;
16434 }
16435
16436 /* vcgt - vector. */
16437
16438 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16439 vcgt_f32 (float32x2_t __a, float32x2_t __b)
16440 {
16441 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16442 }
16443
16444 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16445 vcgt_f64 (float64x1_t __a, float64x1_t __b)
16446 {
16447 return __a > __b ? -1ll : 0ll;
16448 }
16449
16450 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16451 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
16452 {
16453 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16454 (int8x8_t) __b);
16455 }
16456
16457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16458 vcgt_s8 (int8x8_t __a, int8x8_t __b)
16459 {
16460 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16461 }
16462
16463 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16464 vcgt_s16 (int16x4_t __a, int16x4_t __b)
16465 {
16466 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16467 }
16468
16469 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16470 vcgt_s32 (int32x2_t __a, int32x2_t __b)
16471 {
16472 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16473 }
16474
16475 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16476 vcgt_s64 (int64x1_t __a, int64x1_t __b)
16477 {
16478 return __a > __b ? -1ll : 0ll;
16479 }
16480
16481 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16482 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
16483 {
16484 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16485 (int8x8_t) __b);
16486 }
16487
16488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16489 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
16490 {
16491 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16492 (int16x4_t) __b);
16493 }
16494
16495 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16496 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
16497 {
16498 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16499 (int32x2_t) __b);
16500 }
16501
16502 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16503 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
16504 {
16505 return __a > __b ? -1ll : 0ll;
16506 }
16507
16508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16509 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
16510 {
16511 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16512 }
16513
16514 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16515 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
16516 {
16517 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16518 }
16519
16520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16521 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
16522 {
16523 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16524 (int8x16_t) __b);
16525 }
16526
16527 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16528 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
16529 {
16530 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16531 }
16532
16533 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16534 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
16535 {
16536 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16537 }
16538
16539 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16540 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
16541 {
16542 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16543 }
16544
16545 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16546 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
16547 {
16548 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16549 }
16550
16551 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16552 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
16553 {
16554 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16555 (int8x16_t) __b);
16556 }
16557
16558 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16559 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
16560 {
16561 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16562 (int16x8_t) __b);
16563 }
16564
16565 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16566 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
16567 {
16568 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16569 (int32x4_t) __b);
16570 }
16571
16572 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16573 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
16574 {
16575 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16576 (int64x2_t) __b);
16577 }
16578
16579 /* vcgt - scalar. */
16580
16581 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16582 vcgts_f32 (float32_t __a, float32_t __b)
16583 {
16584 return __a > __b ? -1 : 0;
16585 }
16586
16587 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16588 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
16589 {
16590 return __a > __b ? -1ll : 0ll;
16591 }
16592
16593 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16594 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
16595 {
16596 return __a > __b ? -1ll : 0ll;
16597 }
16598
16599 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16600 vcgtd_f64 (float64_t __a, float64_t __b)
16601 {
16602 return __a > __b ? -1ll : 0ll;
16603 }
16604
16605 /* vcgtz - vector. */
16606
16607 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16608 vcgtz_f32 (float32x2_t __a)
16609 {
16610 float32x2_t __b = {0.0f, 0.0f};
16611 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16612 }
16613
16614 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16615 vcgtz_f64 (float64x1_t __a)
16616 {
16617 return __a > 0.0 ? -1ll : 0ll;
16618 }
16619
16620 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16621 vcgtz_p8 (poly8x8_t __a)
16622 {
16623 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16624 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16625 (int8x8_t) __b);
16626 }
16627
16628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16629 vcgtz_s8 (int8x8_t __a)
16630 {
16631 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16632 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16633 }
16634
16635 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16636 vcgtz_s16 (int16x4_t __a)
16637 {
16638 int16x4_t __b = {0, 0, 0, 0};
16639 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16640 }
16641
16642 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16643 vcgtz_s32 (int32x2_t __a)
16644 {
16645 int32x2_t __b = {0, 0};
16646 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16647 }
16648
16649 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16650 vcgtz_s64 (int64x1_t __a)
16651 {
16652 return __a > 0ll ? -1ll : 0ll;
16653 }
16654
16655 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16656 vcgtz_u8 (uint8x8_t __a)
16657 {
16658 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16659 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16660 (int8x8_t) __b);
16661 }
16662
16663 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16664 vcgtz_u16 (uint16x4_t __a)
16665 {
16666 uint16x4_t __b = {0, 0, 0, 0};
16667 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16668 (int16x4_t) __b);
16669 }
16670
16671 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16672 vcgtz_u32 (uint32x2_t __a)
16673 {
16674 uint32x2_t __b = {0, 0};
16675 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16676 (int32x2_t) __b);
16677 }
16678
16679 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16680 vcgtz_u64 (uint64x1_t __a)
16681 {
16682 return __a > 0ll ? -1ll : 0ll;
16683 }
16684
16685 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16686 vcgtzq_f32 (float32x4_t __a)
16687 {
16688 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16689 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16690 }
16691
16692 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16693 vcgtzq_f64 (float64x2_t __a)
16694 {
16695 float64x2_t __b = {0.0, 0.0};
16696 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16697 }
16698
16699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16700 vcgtzq_p8 (poly8x16_t __a)
16701 {
16702 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16703 0, 0, 0, 0, 0, 0, 0, 0};
16704 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16705 (int8x16_t) __b);
16706 }
16707
16708 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16709 vcgtzq_s8 (int8x16_t __a)
16710 {
16711 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16712 0, 0, 0, 0, 0, 0, 0, 0};
16713 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16714 }
16715
16716 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16717 vcgtzq_s16 (int16x8_t __a)
16718 {
16719 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16720 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16721 }
16722
16723 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16724 vcgtzq_s32 (int32x4_t __a)
16725 {
16726 int32x4_t __b = {0, 0, 0, 0};
16727 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16728 }
16729
16730 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16731 vcgtzq_s64 (int64x2_t __a)
16732 {
16733 int64x2_t __b = {0, 0};
16734 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16735 }
16736
16737 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16738 vcgtzq_u8 (uint8x16_t __a)
16739 {
16740 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16741 0, 0, 0, 0, 0, 0, 0, 0};
16742 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16743 (int8x16_t) __b);
16744 }
16745
16746 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16747 vcgtzq_u16 (uint16x8_t __a)
16748 {
16749 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16750 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16751 (int16x8_t) __b);
16752 }
16753
16754 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16755 vcgtzq_u32 (uint32x4_t __a)
16756 {
16757 uint32x4_t __b = {0, 0, 0, 0};
16758 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16759 (int32x4_t) __b);
16760 }
16761
16762 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16763 vcgtzq_u64 (uint64x2_t __a)
16764 {
16765 uint64x2_t __b = {0, 0};
16766 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16767 (int64x2_t) __b);
16768 }
16769
16770 /* vcgtz - scalar. */
16771
16772 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16773 vcgtzs_f32 (float32_t __a)
16774 {
16775 return __a > 0.0f ? -1 : 0;
16776 }
16777
16778 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16779 vcgtzd_s64 (int64x1_t __a)
16780 {
16781 return __a > 0 ? -1ll : 0ll;
16782 }
16783
16784 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16785 vcgtzd_u64 (int64x1_t __a)
16786 {
16787 return __a > 0 ? -1ll : 0ll;
16788 }
16789
16790 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16791 vcgtzd_f64 (float64_t __a)
16792 {
16793 return __a > 0.0 ? -1ll : 0ll;
16794 }
16795
16796 /* vcle - vector. */
16797
16798 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16799 vcle_f32 (float32x2_t __a, float32x2_t __b)
16800 {
16801 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
16802 }
16803
16804 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16805 vcle_f64 (float64x1_t __a, float64x1_t __b)
16806 {
16807 return __a <= __b ? -1ll : 0ll;
16808 }
16809
16810 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16811 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
16812 {
16813 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
16814 (int8x8_t) __a);
16815 }
16816
16817 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16818 vcle_s8 (int8x8_t __a, int8x8_t __b)
16819 {
16820 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
16821 }
16822
16823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16824 vcle_s16 (int16x4_t __a, int16x4_t __b)
16825 {
16826 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
16827 }
16828
16829 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16830 vcle_s32 (int32x2_t __a, int32x2_t __b)
16831 {
16832 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
16833 }
16834
16835 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16836 vcle_s64 (int64x1_t __a, int64x1_t __b)
16837 {
16838 return __a <= __b ? -1ll : 0ll;
16839 }
16840
16841 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16842 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
16843 {
16844 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
16845 (int8x8_t) __a);
16846 }
16847
16848 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16849 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
16850 {
16851 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
16852 (int16x4_t) __a);
16853 }
16854
16855 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16856 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
16857 {
16858 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
16859 (int32x2_t) __a);
16860 }
16861
16862 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16863 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
16864 {
16865 return __a <= __b ? -1ll : 0ll;
16866 }
16867
16868 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16869 vcleq_f32 (float32x4_t __a, float32x4_t __b)
16870 {
16871 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
16872 }
16873
16874 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16875 vcleq_f64 (float64x2_t __a, float64x2_t __b)
16876 {
16877 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
16878 }
16879
16880 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16881 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
16882 {
16883 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
16884 (int8x16_t) __a);
16885 }
16886
16887 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16888 vcleq_s8 (int8x16_t __a, int8x16_t __b)
16889 {
16890 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
16891 }
16892
16893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16894 vcleq_s16 (int16x8_t __a, int16x8_t __b)
16895 {
16896 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
16897 }
16898
16899 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16900 vcleq_s32 (int32x4_t __a, int32x4_t __b)
16901 {
16902 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
16903 }
16904
16905 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16906 vcleq_s64 (int64x2_t __a, int64x2_t __b)
16907 {
16908 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
16909 }
16910
16911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16912 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
16913 {
16914 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
16915 (int8x16_t) __a);
16916 }
16917
16918 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16919 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
16920 {
16921 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
16922 (int16x8_t) __a);
16923 }
16924
16925 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16926 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
16927 {
16928 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
16929 (int32x4_t) __a);
16930 }
16931
16932 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16933 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
16934 {
16935 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
16936 (int64x2_t) __a);
16937 }
16938
16939 /* vcle - scalar. */
16940
16941 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16942 vcles_f32 (float32_t __a, float32_t __b)
16943 {
16944 return __a <= __b ? -1 : 0;
16945 }
16946
16947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16948 vcled_s64 (int64x1_t __a, int64x1_t __b)
16949 {
16950 return __a <= __b ? -1ll : 0ll;
16951 }
16952
16953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16954 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
16955 {
16956 return __a <= __b ? -1ll : 0ll;
16957 }
16958
16959 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16960 vcled_f64 (float64_t __a, float64_t __b)
16961 {
16962 return __a <= __b ? -1ll : 0ll;
16963 }
16964
16965 /* vclez - vector. */
16966
16967 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16968 vclez_f32 (float32x2_t __a)
16969 {
16970 float32x2_t __b = {0.0f, 0.0f};
16971 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
16972 }
16973
16974 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16975 vclez_f64 (float64x1_t __a)
16976 {
16977 return __a <= 0.0 ? -1ll : 0ll;
16978 }
16979
16980 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16981 vclez_p8 (poly8x8_t __a)
16982 {
16983 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16984 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
16985 (int8x8_t) __b);
16986 }
16987
16988 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16989 vclez_s8 (int8x8_t __a)
16990 {
16991 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16992 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
16993 }
16994
16995 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16996 vclez_s16 (int16x4_t __a)
16997 {
16998 int16x4_t __b = {0, 0, 0, 0};
16999 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
17000 }
17001
17002 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17003 vclez_s32 (int32x2_t __a)
17004 {
17005 int32x2_t __b = {0, 0};
17006 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
17007 }
17008
17009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17010 vclez_s64 (int64x1_t __a)
17011 {
17012 return __a <= 0ll ? -1ll : 0ll;
17013 }
17014
17015 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17016 vclez_u64 (uint64x1_t __a)
17017 {
17018 return __a <= 0ll ? -1ll : 0ll;
17019 }
17020
17021 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17022 vclezq_f32 (float32x4_t __a)
17023 {
17024 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17025 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
17026 }
17027
17028 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17029 vclezq_f64 (float64x2_t __a)
17030 {
17031 float64x2_t __b = {0.0, 0.0};
17032 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
17033 }
17034
17035 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17036 vclezq_p8 (poly8x16_t __a)
17037 {
17038 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17039 0, 0, 0, 0, 0, 0, 0, 0};
17040 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
17041 (int8x16_t) __b);
17042 }
17043
17044 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17045 vclezq_s8 (int8x16_t __a)
17046 {
17047 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17048 0, 0, 0, 0, 0, 0, 0, 0};
17049 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
17050 }
17051
17052 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17053 vclezq_s16 (int16x8_t __a)
17054 {
17055 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17056 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
17057 }
17058
17059 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17060 vclezq_s32 (int32x4_t __a)
17061 {
17062 int32x4_t __b = {0, 0, 0, 0};
17063 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
17064 }
17065
17066 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17067 vclezq_s64 (int64x2_t __a)
17068 {
17069 int64x2_t __b = {0, 0};
17070 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
17071 }
17072
17073 /* vclez - scalar. */
17074
17075 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17076 vclezs_f32 (float32_t __a)
17077 {
17078 return __a <= 0.0f ? -1 : 0;
17079 }
17080
17081 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17082 vclezd_s64 (int64x1_t __a)
17083 {
17084 return __a <= 0 ? -1ll : 0ll;
17085 }
17086
17087 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17088 vclezd_u64 (int64x1_t __a)
17089 {
17090 return __a <= 0 ? -1ll : 0ll;
17091 }
17092
17093 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17094 vclezd_f64 (float64_t __a)
17095 {
17096 return __a <= 0.0 ? -1ll : 0ll;
17097 }
17098
17099 /* vclt - vector. */
17100
17101 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17102 vclt_f32 (float32x2_t __a, float32x2_t __b)
17103 {
17104 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
17105 }
17106
17107 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17108 vclt_f64 (float64x1_t __a, float64x1_t __b)
17109 {
17110 return __a < __b ? -1ll : 0ll;
17111 }
17112
17113 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17114 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
17115 {
17116 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
17117 (int8x8_t) __a);
17118 }
17119
17120 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17121 vclt_s8 (int8x8_t __a, int8x8_t __b)
17122 {
17123 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
17124 }
17125
17126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17127 vclt_s16 (int16x4_t __a, int16x4_t __b)
17128 {
17129 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
17130 }
17131
17132 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17133 vclt_s32 (int32x2_t __a, int32x2_t __b)
17134 {
17135 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
17136 }
17137
17138 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17139 vclt_s64 (int64x1_t __a, int64x1_t __b)
17140 {
17141 return __a < __b ? -1ll : 0ll;
17142 }
17143
17144 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17145 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
17146 {
17147 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
17148 (int8x8_t) __a);
17149 }
17150
17151 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17152 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
17153 {
17154 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
17155 (int16x4_t) __a);
17156 }
17157
17158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17159 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
17160 {
17161 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
17162 (int32x2_t) __a);
17163 }
17164
17165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17166 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
17167 {
17168 return __a < __b ? -1ll : 0ll;
17169 }
17170
17171 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17172 vcltq_f32 (float32x4_t __a, float32x4_t __b)
17173 {
17174 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
17175 }
17176
17177 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17178 vcltq_f64 (float64x2_t __a, float64x2_t __b)
17179 {
17180 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
17181 }
17182
17183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17184 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
17185 {
17186 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
17187 (int8x16_t) __a);
17188 }
17189
17190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17191 vcltq_s8 (int8x16_t __a, int8x16_t __b)
17192 {
17193 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
17194 }
17195
17196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17197 vcltq_s16 (int16x8_t __a, int16x8_t __b)
17198 {
17199 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
17200 }
17201
17202 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17203 vcltq_s32 (int32x4_t __a, int32x4_t __b)
17204 {
17205 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
17206 }
17207
17208 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17209 vcltq_s64 (int64x2_t __a, int64x2_t __b)
17210 {
17211 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
17212 }
17213
17214 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17215 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
17216 {
17217 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
17218 (int8x16_t) __a);
17219 }
17220
17221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17222 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
17223 {
17224 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
17225 (int16x8_t) __a);
17226 }
17227
17228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17229 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
17230 {
17231 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
17232 (int32x4_t) __a);
17233 }
17234
17235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17236 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
17237 {
17238 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
17239 (int64x2_t) __a);
17240 }
17241
17242 /* vclt - scalar. */
17243
17244 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17245 vclts_f32 (float32_t __a, float32_t __b)
17246 {
17247 return __a < __b ? -1 : 0;
17248 }
17249
17250 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17251 vcltd_s64 (int64x1_t __a, int64x1_t __b)
17252 {
17253 return __a < __b ? -1ll : 0ll;
17254 }
17255
17256 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17257 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
17258 {
17259 return __a < __b ? -1ll : 0ll;
17260 }
17261
17262 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17263 vcltd_f64 (float64_t __a, float64_t __b)
17264 {
17265 return __a < __b ? -1ll : 0ll;
17266 }
17267
17268 /* vcltz - vector. */
17269
17270 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17271 vcltz_f32 (float32x2_t __a)
17272 {
17273 float32x2_t __b = {0.0f, 0.0f};
17274 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
17275 }
17276
17277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17278 vcltz_f64 (float64x1_t __a)
17279 {
17280 return __a < 0.0 ? -1ll : 0ll;
17281 }
17282
17283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17284 vcltz_p8 (poly8x8_t __a)
17285 {
17286 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17287 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
17288 (int8x8_t) __b);
17289 }
17290
17291 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17292 vcltz_s8 (int8x8_t __a)
17293 {
17294 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17295 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
17296 }
17297
17298 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17299 vcltz_s16 (int16x4_t __a)
17300 {
17301 int16x4_t __b = {0, 0, 0, 0};
17302 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
17303 }
17304
17305 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17306 vcltz_s32 (int32x2_t __a)
17307 {
17308 int32x2_t __b = {0, 0};
17309 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
17310 }
17311
17312 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17313 vcltz_s64 (int64x1_t __a)
17314 {
17315 return __a < 0ll ? -1ll : 0ll;
17316 }
17317
17318 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17319 vcltzq_f32 (float32x4_t __a)
17320 {
17321 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17322 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
17323 }
17324
17325 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17326 vcltzq_f64 (float64x2_t __a)
17327 {
17328 float64x2_t __b = {0.0, 0.0};
17329 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
17330 }
17331
17332 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17333 vcltzq_p8 (poly8x16_t __a)
17334 {
17335 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17336 0, 0, 0, 0, 0, 0, 0, 0};
17337 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
17338 (int8x16_t) __b);
17339 }
17340
17341 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17342 vcltzq_s8 (int8x16_t __a)
17343 {
17344 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17345 0, 0, 0, 0, 0, 0, 0, 0};
17346 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
17347 }
17348
17349 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17350 vcltzq_s16 (int16x8_t __a)
17351 {
17352 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17353 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
17354 }
17355
17356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17357 vcltzq_s32 (int32x4_t __a)
17358 {
17359 int32x4_t __b = {0, 0, 0, 0};
17360 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
17361 }
17362
17363 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17364 vcltzq_s64 (int64x2_t __a)
17365 {
17366 int64x2_t __b = {0, 0};
17367 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
17368 }
17369
17370 /* vcltz - scalar. */
17371
17372 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17373 vcltzs_f32 (float32_t __a)
17374 {
17375 return __a < 0.0f ? -1 : 0;
17376 }
17377
17378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17379 vcltzd_s64 (int64x1_t __a)
17380 {
17381 return __a < 0 ? -1ll : 0ll;
17382 }
17383
17384 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17385 vcltzd_u64 (int64x1_t __a)
17386 {
17387 return __a < 0 ? -1ll : 0ll;
17388 }
17389
17390 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17391 vcltzd_f64 (float64_t __a)
17392 {
17393 return __a < 0.0 ? -1ll : 0ll;
17394 }
17395
17396 /* vclz. */
17397
17398 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17399 vclz_s8 (int8x8_t __a)
17400 {
17401 return __builtin_aarch64_clzv8qi (__a);
17402 }
17403
17404 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17405 vclz_s16 (int16x4_t __a)
17406 {
17407 return __builtin_aarch64_clzv4hi (__a);
17408 }
17409
17410 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17411 vclz_s32 (int32x2_t __a)
17412 {
17413 return __builtin_aarch64_clzv2si (__a);
17414 }
17415
17416 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17417 vclz_u8 (uint8x8_t __a)
17418 {
17419 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
17420 }
17421
17422 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17423 vclz_u16 (uint16x4_t __a)
17424 {
17425 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
17426 }
17427
17428 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17429 vclz_u32 (uint32x2_t __a)
17430 {
17431 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
17432 }
17433
17434 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17435 vclzq_s8 (int8x16_t __a)
17436 {
17437 return __builtin_aarch64_clzv16qi (__a);
17438 }
17439
17440 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17441 vclzq_s16 (int16x8_t __a)
17442 {
17443 return __builtin_aarch64_clzv8hi (__a);
17444 }
17445
17446 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17447 vclzq_s32 (int32x4_t __a)
17448 {
17449 return __builtin_aarch64_clzv4si (__a);
17450 }
17451
17452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17453 vclzq_u8 (uint8x16_t __a)
17454 {
17455 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
17456 }
17457
17458 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17459 vclzq_u16 (uint16x8_t __a)
17460 {
17461 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
17462 }
17463
17464 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17465 vclzq_u32 (uint32x4_t __a)
17466 {
17467 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
17468 }
17469
17470 /* vcvt (double -> float). */
17471
17472 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17473 vcvt_f32_f64 (float64x2_t __a)
17474 {
17475 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
17476 }
17477
17478 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17479 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
17480 {
17481 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
17482 }
17483
17484 /* vcvt (float -> double). */
17485
17486 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17487 vcvt_f64_f32 (float32x2_t __a)
17488 {
17489
17490 return __builtin_aarch64_float_extend_lo_v2df (__a);
17491 }
17492
17493 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17494 vcvt_high_f64_f32 (float32x4_t __a)
17495 {
17496 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
17497 }
17498
17499 /* vcvt (<u>int -> float) */
17500
17501 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17502 vcvtd_f64_s64 (int64_t __a)
17503 {
17504 return (float64_t) __a;
17505 }
17506
17507 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17508 vcvtd_f64_u64 (uint64_t __a)
17509 {
17510 return (float64_t) __a;
17511 }
17512
17513 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17514 vcvts_f32_s32 (int32_t __a)
17515 {
17516 return (float32_t) __a;
17517 }
17518
17519 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17520 vcvts_f32_u32 (uint32_t __a)
17521 {
17522 return (float32_t) __a;
17523 }
17524
17525 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17526 vcvt_f32_s32 (int32x2_t __a)
17527 {
17528 return __builtin_aarch64_floatv2siv2sf (__a);
17529 }
17530
17531 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17532 vcvt_f32_u32 (uint32x2_t __a)
17533 {
17534 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
17535 }
17536
17537 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17538 vcvtq_f32_s32 (int32x4_t __a)
17539 {
17540 return __builtin_aarch64_floatv4siv4sf (__a);
17541 }
17542
17543 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17544 vcvtq_f32_u32 (uint32x4_t __a)
17545 {
17546 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
17547 }
17548
17549 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17550 vcvtq_f64_s64 (int64x2_t __a)
17551 {
17552 return __builtin_aarch64_floatv2div2df (__a);
17553 }
17554
17555 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17556 vcvtq_f64_u64 (uint64x2_t __a)
17557 {
17558 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
17559 }
17560
17561 /* vcvt (float -> <u>int) */
17562
17563 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17564 vcvtd_s64_f64 (float64_t __a)
17565 {
17566 return (int64_t) __a;
17567 }
17568
17569 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17570 vcvtd_u64_f64 (float64_t __a)
17571 {
17572 return (uint64_t) __a;
17573 }
17574
17575 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17576 vcvts_s32_f32 (float32_t __a)
17577 {
17578 return (int32_t) __a;
17579 }
17580
17581 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17582 vcvts_u32_f32 (float32_t __a)
17583 {
17584 return (uint32_t) __a;
17585 }
17586
17587 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17588 vcvt_s32_f32 (float32x2_t __a)
17589 {
17590 return __builtin_aarch64_lbtruncv2sfv2si (__a);
17591 }
17592
17593 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17594 vcvt_u32_f32 (float32x2_t __a)
17595 {
17596 /* TODO: This cast should go away when builtins have
17597 their correct types. */
17598 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
17599 }
17600
17601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17602 vcvtq_s32_f32 (float32x4_t __a)
17603 {
17604 return __builtin_aarch64_lbtruncv4sfv4si (__a);
17605 }
17606
17607 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17608 vcvtq_u32_f32 (float32x4_t __a)
17609 {
17610 /* TODO: This cast should go away when builtins have
17611 their correct types. */
17612 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
17613 }
17614
17615 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17616 vcvtq_s64_f64 (float64x2_t __a)
17617 {
17618 return __builtin_aarch64_lbtruncv2dfv2di (__a);
17619 }
17620
17621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17622 vcvtq_u64_f64 (float64x2_t __a)
17623 {
17624 /* TODO: This cast should go away when builtins have
17625 their correct types. */
17626 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
17627 }
17628
17629 /* vcvta */
17630
17631 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17632 vcvtad_s64_f64 (float64_t __a)
17633 {
17634 return __builtin_aarch64_lrounddfdi (__a);
17635 }
17636
17637 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17638 vcvtad_u64_f64 (float64_t __a)
17639 {
17640 return __builtin_aarch64_lroundudfdi (__a);
17641 }
17642
17643 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17644 vcvtas_s32_f32 (float32_t __a)
17645 {
17646 return __builtin_aarch64_lroundsfsi (__a);
17647 }
17648
17649 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17650 vcvtas_u32_f32 (float32_t __a)
17651 {
17652 return __builtin_aarch64_lroundusfsi (__a);
17653 }
17654
17655 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17656 vcvta_s32_f32 (float32x2_t __a)
17657 {
17658 return __builtin_aarch64_lroundv2sfv2si (__a);
17659 }
17660
17661 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17662 vcvta_u32_f32 (float32x2_t __a)
17663 {
17664 /* TODO: This cast should go away when builtins have
17665 their correct types. */
17666 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
17667 }
17668
17669 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17670 vcvtaq_s32_f32 (float32x4_t __a)
17671 {
17672 return __builtin_aarch64_lroundv4sfv4si (__a);
17673 }
17674
17675 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17676 vcvtaq_u32_f32 (float32x4_t __a)
17677 {
17678 /* TODO: This cast should go away when builtins have
17679 their correct types. */
17680 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
17681 }
17682
17683 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17684 vcvtaq_s64_f64 (float64x2_t __a)
17685 {
17686 return __builtin_aarch64_lroundv2dfv2di (__a);
17687 }
17688
17689 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17690 vcvtaq_u64_f64 (float64x2_t __a)
17691 {
17692 /* TODO: This cast should go away when builtins have
17693 their correct types. */
17694 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
17695 }
17696
17697 /* vcvtm */
17698
17699 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17700 vcvtmd_s64_f64 (float64_t __a)
17701 {
17702 return __builtin_lfloor (__a);
17703 }
17704
17705 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17706 vcvtmd_u64_f64 (float64_t __a)
17707 {
17708 return __builtin_aarch64_lfloorudfdi (__a);
17709 }
17710
17711 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17712 vcvtms_s32_f32 (float32_t __a)
17713 {
17714 return __builtin_ifloorf (__a);
17715 }
17716
17717 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17718 vcvtms_u32_f32 (float32_t __a)
17719 {
17720 return __builtin_aarch64_lfloorusfsi (__a);
17721 }
17722
17723 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17724 vcvtm_s32_f32 (float32x2_t __a)
17725 {
17726 return __builtin_aarch64_lfloorv2sfv2si (__a);
17727 }
17728
17729 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17730 vcvtm_u32_f32 (float32x2_t __a)
17731 {
17732 /* TODO: This cast should go away when builtins have
17733 their correct types. */
17734 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
17735 }
17736
17737 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17738 vcvtmq_s32_f32 (float32x4_t __a)
17739 {
17740 return __builtin_aarch64_lfloorv4sfv4si (__a);
17741 }
17742
17743 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17744 vcvtmq_u32_f32 (float32x4_t __a)
17745 {
17746 /* TODO: This cast should go away when builtins have
17747 their correct types. */
17748 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
17749 }
17750
17751 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17752 vcvtmq_s64_f64 (float64x2_t __a)
17753 {
17754 return __builtin_aarch64_lfloorv2dfv2di (__a);
17755 }
17756
17757 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17758 vcvtmq_u64_f64 (float64x2_t __a)
17759 {
17760 /* TODO: This cast should go away when builtins have
17761 their correct types. */
17762 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
17763 }
17764
17765 /* vcvtn */
17766
17767 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17768 vcvtnd_s64_f64 (float64_t __a)
17769 {
17770 return __builtin_aarch64_lfrintndfdi (__a);
17771 }
17772
17773 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17774 vcvtnd_u64_f64 (float64_t __a)
17775 {
17776 return __builtin_aarch64_lfrintnudfdi (__a);
17777 }
17778
17779 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17780 vcvtns_s32_f32 (float32_t __a)
17781 {
17782 return __builtin_aarch64_lfrintnsfsi (__a);
17783 }
17784
17785 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17786 vcvtns_u32_f32 (float32_t __a)
17787 {
17788 return __builtin_aarch64_lfrintnusfsi (__a);
17789 }
17790
17791 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17792 vcvtn_s32_f32 (float32x2_t __a)
17793 {
17794 return __builtin_aarch64_lfrintnv2sfv2si (__a);
17795 }
17796
17797 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17798 vcvtn_u32_f32 (float32x2_t __a)
17799 {
17800 /* TODO: This cast should go away when builtins have
17801 their correct types. */
17802 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
17803 }
17804
17805 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17806 vcvtnq_s32_f32 (float32x4_t __a)
17807 {
17808 return __builtin_aarch64_lfrintnv4sfv4si (__a);
17809 }
17810
17811 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17812 vcvtnq_u32_f32 (float32x4_t __a)
17813 {
17814 /* TODO: This cast should go away when builtins have
17815 their correct types. */
17816 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
17817 }
17818
17819 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17820 vcvtnq_s64_f64 (float64x2_t __a)
17821 {
17822 return __builtin_aarch64_lfrintnv2dfv2di (__a);
17823 }
17824
17825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17826 vcvtnq_u64_f64 (float64x2_t __a)
17827 {
17828 /* TODO: This cast should go away when builtins have
17829 their correct types. */
17830 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
17831 }
17832
17833 /* vcvtp */
17834
17835 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
17836 vcvtpd_s64_f64 (float64_t __a)
17837 {
17838 return __builtin_lceil (__a);
17839 }
17840
17841 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17842 vcvtpd_u64_f64 (float64_t __a)
17843 {
17844 return __builtin_aarch64_lceiludfdi (__a);
17845 }
17846
17847 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17848 vcvtps_s32_f32 (float32_t __a)
17849 {
17850 return __builtin_iceilf (__a);
17851 }
17852
17853 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17854 vcvtps_u32_f32 (float32_t __a)
17855 {
17856 return __builtin_aarch64_lceilusfsi (__a);
17857 }
17858
17859 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17860 vcvtp_s32_f32 (float32x2_t __a)
17861 {
17862 return __builtin_aarch64_lceilv2sfv2si (__a);
17863 }
17864
17865 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17866 vcvtp_u32_f32 (float32x2_t __a)
17867 {
17868 /* TODO: This cast should go away when builtins have
17869 their correct types. */
17870 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
17871 }
17872
17873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17874 vcvtpq_s32_f32 (float32x4_t __a)
17875 {
17876 return __builtin_aarch64_lceilv4sfv4si (__a);
17877 }
17878
17879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17880 vcvtpq_u32_f32 (float32x4_t __a)
17881 {
17882 /* TODO: This cast should go away when builtins have
17883 their correct types. */
17884 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
17885 }
17886
17887 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17888 vcvtpq_s64_f64 (float64x2_t __a)
17889 {
17890 return __builtin_aarch64_lceilv2dfv2di (__a);
17891 }
17892
17893 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17894 vcvtpq_u64_f64 (float64x2_t __a)
17895 {
17896 /* TODO: This cast should go away when builtins have
17897 their correct types. */
17898 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
17899 }
17900
17901 /* vdup_n */
17902
17903 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17904 vdup_n_f32 (float32_t __a)
17905 {
17906 return (float32x2_t) {__a, __a};
17907 }
17908
17909 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17910 vdup_n_f64 (float64_t __a)
17911 {
17912 return __a;
17913 }
17914
17915 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17916 vdup_n_p8 (poly8_t __a)
17917 {
17918 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17919 }
17920
17921 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17922 vdup_n_p16 (poly16_t __a)
17923 {
17924 return (poly16x4_t) {__a, __a, __a, __a};
17925 }
17926
17927 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17928 vdup_n_s8 (int8_t __a)
17929 {
17930 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17931 }
17932
17933 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17934 vdup_n_s16 (int16_t __a)
17935 {
17936 return (int16x4_t) {__a, __a, __a, __a};
17937 }
17938
17939 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17940 vdup_n_s32 (int32_t __a)
17941 {
17942 return (int32x2_t) {__a, __a};
17943 }
17944
17945 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17946 vdup_n_s64 (int64_t __a)
17947 {
17948 return __a;
17949 }
17950
17951 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17952 vdup_n_u8 (uint8_t __a)
17953 {
17954 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17955 }
17956
17957 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17958 vdup_n_u16 (uint16_t __a)
17959 {
17960 return (uint16x4_t) {__a, __a, __a, __a};
17961 }
17962
17963 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17964 vdup_n_u32 (uint32_t __a)
17965 {
17966 return (uint32x2_t) {__a, __a};
17967 }
17968
17969 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17970 vdup_n_u64 (uint64_t __a)
17971 {
17972 return __a;
17973 }
17974
17975 /* vdupq_n */
17976
17977 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17978 vdupq_n_f32 (float32_t __a)
17979 {
17980 return (float32x4_t) {__a, __a, __a, __a};
17981 }
17982
17983 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17984 vdupq_n_f64 (float64_t __a)
17985 {
17986 return (float64x2_t) {__a, __a};
17987 }
17988
17989 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17990 vdupq_n_p8 (uint32_t __a)
17991 {
17992 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17993 __a, __a, __a, __a, __a, __a, __a, __a};
17994 }
17995
17996 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17997 vdupq_n_p16 (uint32_t __a)
17998 {
17999 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18000 }
18001
18002 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18003 vdupq_n_s8 (int32_t __a)
18004 {
18005 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18006 __a, __a, __a, __a, __a, __a, __a, __a};
18007 }
18008
18009 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18010 vdupq_n_s16 (int32_t __a)
18011 {
18012 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18013 }
18014
18015 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18016 vdupq_n_s32 (int32_t __a)
18017 {
18018 return (int32x4_t) {__a, __a, __a, __a};
18019 }
18020
18021 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18022 vdupq_n_s64 (int64_t __a)
18023 {
18024 return (int64x2_t) {__a, __a};
18025 }
18026
18027 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18028 vdupq_n_u8 (uint32_t __a)
18029 {
18030 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18031 __a, __a, __a, __a, __a, __a, __a, __a};
18032 }
18033
18034 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18035 vdupq_n_u16 (uint32_t __a)
18036 {
18037 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18038 }
18039
18040 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18041 vdupq_n_u32 (uint32_t __a)
18042 {
18043 return (uint32x4_t) {__a, __a, __a, __a};
18044 }
18045
18046 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18047 vdupq_n_u64 (uint64_t __a)
18048 {
18049 return (uint64x2_t) {__a, __a};
18050 }
18051
18052 /* vdup_lane */
18053
18054 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18055 vdup_lane_f32 (float32x2_t __a, const int __b)
18056 {
18057 return __aarch64_vdup_lane_f32 (__a, __b);
18058 }
18059
18060 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18061 vdup_lane_f64 (float64x1_t __a, const int __b)
18062 {
18063 return __aarch64_vdup_lane_f64 (__a, __b);
18064 }
18065
18066 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18067 vdup_lane_p8 (poly8x8_t __a, const int __b)
18068 {
18069 return __aarch64_vdup_lane_p8 (__a, __b);
18070 }
18071
18072 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18073 vdup_lane_p16 (poly16x4_t __a, const int __b)
18074 {
18075 return __aarch64_vdup_lane_p16 (__a, __b);
18076 }
18077
18078 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18079 vdup_lane_s8 (int8x8_t __a, const int __b)
18080 {
18081 return __aarch64_vdup_lane_s8 (__a, __b);
18082 }
18083
18084 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18085 vdup_lane_s16 (int16x4_t __a, const int __b)
18086 {
18087 return __aarch64_vdup_lane_s16 (__a, __b);
18088 }
18089
18090 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18091 vdup_lane_s32 (int32x2_t __a, const int __b)
18092 {
18093 return __aarch64_vdup_lane_s32 (__a, __b);
18094 }
18095
18096 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18097 vdup_lane_s64 (int64x1_t __a, const int __b)
18098 {
18099 return __aarch64_vdup_lane_s64 (__a, __b);
18100 }
18101
18102 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18103 vdup_lane_u8 (uint8x8_t __a, const int __b)
18104 {
18105 return __aarch64_vdup_lane_u8 (__a, __b);
18106 }
18107
18108 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18109 vdup_lane_u16 (uint16x4_t __a, const int __b)
18110 {
18111 return __aarch64_vdup_lane_u16 (__a, __b);
18112 }
18113
18114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18115 vdup_lane_u32 (uint32x2_t __a, const int __b)
18116 {
18117 return __aarch64_vdup_lane_u32 (__a, __b);
18118 }
18119
18120 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18121 vdup_lane_u64 (uint64x1_t __a, const int __b)
18122 {
18123 return __aarch64_vdup_lane_u64 (__a, __b);
18124 }
18125
18126 /* vdup_laneq */
18127
18128 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18129 vdup_laneq_f32 (float32x4_t __a, const int __b)
18130 {
18131 return __aarch64_vdup_laneq_f32 (__a, __b);
18132 }
18133
18134 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18135 vdup_laneq_f64 (float64x2_t __a, const int __b)
18136 {
18137 return __aarch64_vdup_laneq_f64 (__a, __b);
18138 }
18139
18140 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18141 vdup_laneq_p8 (poly8x16_t __a, const int __b)
18142 {
18143 return __aarch64_vdup_laneq_p8 (__a, __b);
18144 }
18145
18146 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18147 vdup_laneq_p16 (poly16x8_t __a, const int __b)
18148 {
18149 return __aarch64_vdup_laneq_p16 (__a, __b);
18150 }
18151
18152 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18153 vdup_laneq_s8 (int8x16_t __a, const int __b)
18154 {
18155 return __aarch64_vdup_laneq_s8 (__a, __b);
18156 }
18157
18158 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18159 vdup_laneq_s16 (int16x8_t __a, const int __b)
18160 {
18161 return __aarch64_vdup_laneq_s16 (__a, __b);
18162 }
18163
18164 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18165 vdup_laneq_s32 (int32x4_t __a, const int __b)
18166 {
18167 return __aarch64_vdup_laneq_s32 (__a, __b);
18168 }
18169
18170 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18171 vdup_laneq_s64 (int64x2_t __a, const int __b)
18172 {
18173 return __aarch64_vdup_laneq_s64 (__a, __b);
18174 }
18175
18176 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18177 vdup_laneq_u8 (uint8x16_t __a, const int __b)
18178 {
18179 return __aarch64_vdup_laneq_u8 (__a, __b);
18180 }
18181
18182 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18183 vdup_laneq_u16 (uint16x8_t __a, const int __b)
18184 {
18185 return __aarch64_vdup_laneq_u16 (__a, __b);
18186 }
18187
18188 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18189 vdup_laneq_u32 (uint32x4_t __a, const int __b)
18190 {
18191 return __aarch64_vdup_laneq_u32 (__a, __b);
18192 }
18193
18194 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18195 vdup_laneq_u64 (uint64x2_t __a, const int __b)
18196 {
18197 return __aarch64_vdup_laneq_u64 (__a, __b);
18198 }
18199
18200 /* vdupq_lane */
18201 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18202 vdupq_lane_f32 (float32x2_t __a, const int __b)
18203 {
18204 return __aarch64_vdupq_lane_f32 (__a, __b);
18205 }
18206
18207 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18208 vdupq_lane_f64 (float64x1_t __a, const int __b)
18209 {
18210 return __aarch64_vdupq_lane_f64 (__a, __b);
18211 }
18212
18213 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18214 vdupq_lane_p8 (poly8x8_t __a, const int __b)
18215 {
18216 return __aarch64_vdupq_lane_p8 (__a, __b);
18217 }
18218
18219 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18220 vdupq_lane_p16 (poly16x4_t __a, const int __b)
18221 {
18222 return __aarch64_vdupq_lane_p16 (__a, __b);
18223 }
18224
18225 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18226 vdupq_lane_s8 (int8x8_t __a, const int __b)
18227 {
18228 return __aarch64_vdupq_lane_s8 (__a, __b);
18229 }
18230
18231 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18232 vdupq_lane_s16 (int16x4_t __a, const int __b)
18233 {
18234 return __aarch64_vdupq_lane_s16 (__a, __b);
18235 }
18236
18237 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18238 vdupq_lane_s32 (int32x2_t __a, const int __b)
18239 {
18240 return __aarch64_vdupq_lane_s32 (__a, __b);
18241 }
18242
18243 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18244 vdupq_lane_s64 (int64x1_t __a, const int __b)
18245 {
18246 return __aarch64_vdupq_lane_s64 (__a, __b);
18247 }
18248
18249 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18250 vdupq_lane_u8 (uint8x8_t __a, const int __b)
18251 {
18252 return __aarch64_vdupq_lane_u8 (__a, __b);
18253 }
18254
18255 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18256 vdupq_lane_u16 (uint16x4_t __a, const int __b)
18257 {
18258 return __aarch64_vdupq_lane_u16 (__a, __b);
18259 }
18260
18261 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18262 vdupq_lane_u32 (uint32x2_t __a, const int __b)
18263 {
18264 return __aarch64_vdupq_lane_u32 (__a, __b);
18265 }
18266
18267 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18268 vdupq_lane_u64 (uint64x1_t __a, const int __b)
18269 {
18270 return __aarch64_vdupq_lane_u64 (__a, __b);
18271 }
18272
18273 /* vdupq_laneq */
18274 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18275 vdupq_laneq_f32 (float32x4_t __a, const int __b)
18276 {
18277 return __aarch64_vdupq_laneq_f32 (__a, __b);
18278 }
18279
18280 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18281 vdupq_laneq_f64 (float64x2_t __a, const int __b)
18282 {
18283 return __aarch64_vdupq_laneq_f64 (__a, __b);
18284 }
18285
18286 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18287 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
18288 {
18289 return __aarch64_vdupq_laneq_p8 (__a, __b);
18290 }
18291
18292 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18293 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
18294 {
18295 return __aarch64_vdupq_laneq_p16 (__a, __b);
18296 }
18297
18298 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18299 vdupq_laneq_s8 (int8x16_t __a, const int __b)
18300 {
18301 return __aarch64_vdupq_laneq_s8 (__a, __b);
18302 }
18303
18304 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18305 vdupq_laneq_s16 (int16x8_t __a, const int __b)
18306 {
18307 return __aarch64_vdupq_laneq_s16 (__a, __b);
18308 }
18309
18310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18311 vdupq_laneq_s32 (int32x4_t __a, const int __b)
18312 {
18313 return __aarch64_vdupq_laneq_s32 (__a, __b);
18314 }
18315
18316 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18317 vdupq_laneq_s64 (int64x2_t __a, const int __b)
18318 {
18319 return __aarch64_vdupq_laneq_s64 (__a, __b);
18320 }
18321
18322 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18323 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
18324 {
18325 return __aarch64_vdupq_laneq_u8 (__a, __b);
18326 }
18327
18328 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18329 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
18330 {
18331 return __aarch64_vdupq_laneq_u16 (__a, __b);
18332 }
18333
18334 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18335 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
18336 {
18337 return __aarch64_vdupq_laneq_u32 (__a, __b);
18338 }
18339
18340 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18341 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
18342 {
18343 return __aarch64_vdupq_laneq_u64 (__a, __b);
18344 }
18345
18346 /* vdupb_lane */
18347 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18348 vdupb_lane_p8 (poly8x8_t __a, const int __b)
18349 {
18350 return __aarch64_vget_lane_p8 (__a, __b);
18351 }
18352
18353 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18354 vdupb_lane_s8 (int8x8_t __a, const int __b)
18355 {
18356 return __aarch64_vget_lane_s8 (__a, __b);
18357 }
18358
18359 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18360 vdupb_lane_u8 (uint8x8_t __a, const int __b)
18361 {
18362 return __aarch64_vget_lane_u8 (__a, __b);
18363 }
18364
18365 /* vduph_lane */
18366 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18367 vduph_lane_p16 (poly16x4_t __a, const int __b)
18368 {
18369 return __aarch64_vget_lane_p16 (__a, __b);
18370 }
18371
18372 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18373 vduph_lane_s16 (int16x4_t __a, const int __b)
18374 {
18375 return __aarch64_vget_lane_s16 (__a, __b);
18376 }
18377
18378 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18379 vduph_lane_u16 (uint16x4_t __a, const int __b)
18380 {
18381 return __aarch64_vget_lane_u16 (__a, __b);
18382 }
18383
18384 /* vdups_lane */
18385 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18386 vdups_lane_f32 (float32x2_t __a, const int __b)
18387 {
18388 return __aarch64_vget_lane_f32 (__a, __b);
18389 }
18390
18391 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18392 vdups_lane_s32 (int32x2_t __a, const int __b)
18393 {
18394 return __aarch64_vget_lane_s32 (__a, __b);
18395 }
18396
18397 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18398 vdups_lane_u32 (uint32x2_t __a, const int __b)
18399 {
18400 return __aarch64_vget_lane_u32 (__a, __b);
18401 }
18402
18403 /* vdupd_lane */
18404 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18405 vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
18406 {
18407 return __a;
18408 }
18409
18410 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18411 vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
18412 {
18413 return __a;
18414 }
18415
18416 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18417 vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
18418 {
18419 return __a;
18420 }
18421
18422 /* vdupb_laneq */
18423 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18424 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
18425 {
18426 return __aarch64_vgetq_lane_p8 (__a, __b);
18427 }
18428
18429 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18430 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
18431 {
18432 return __aarch64_vgetq_lane_s8 (__a, __b);
18433 }
18434
18435 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18436 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
18437 {
18438 return __aarch64_vgetq_lane_u8 (__a, __b);
18439 }
18440
18441 /* vduph_laneq */
18442 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18443 vduph_laneq_p16 (poly16x8_t __a, const int __b)
18444 {
18445 return __aarch64_vgetq_lane_p16 (__a, __b);
18446 }
18447
18448 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18449 vduph_laneq_s16 (int16x8_t __a, const int __b)
18450 {
18451 return __aarch64_vgetq_lane_s16 (__a, __b);
18452 }
18453
18454 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18455 vduph_laneq_u16 (uint16x8_t __a, const int __b)
18456 {
18457 return __aarch64_vgetq_lane_u16 (__a, __b);
18458 }
18459
18460 /* vdups_laneq */
18461 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18462 vdups_laneq_f32 (float32x4_t __a, const int __b)
18463 {
18464 return __aarch64_vgetq_lane_f32 (__a, __b);
18465 }
18466
18467 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18468 vdups_laneq_s32 (int32x4_t __a, const int __b)
18469 {
18470 return __aarch64_vgetq_lane_s32 (__a, __b);
18471 }
18472
18473 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18474 vdups_laneq_u32 (uint32x4_t __a, const int __b)
18475 {
18476 return __aarch64_vgetq_lane_u32 (__a, __b);
18477 }
18478
18479 /* vdupd_laneq */
18480 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18481 vdupd_laneq_f64 (float64x2_t __a, const int __b)
18482 {
18483 return __aarch64_vgetq_lane_f64 (__a, __b);
18484 }
18485
18486 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18487 vdupd_laneq_s64 (int64x2_t __a, const int __b)
18488 {
18489 return __aarch64_vgetq_lane_s64 (__a, __b);
18490 }
18491
18492 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18493 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
18494 {
18495 return __aarch64_vgetq_lane_u64 (__a, __b);
18496 }
18497
18498 /* vfma_lane */
18499
18500 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18501 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
18502 float32x2_t __c, const int __lane)
18503 {
18504 return __builtin_aarch64_fmav2sf (__b,
18505 __aarch64_vdup_lane_f32 (__c, __lane),
18506 __a);
18507 }
18508
18509 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18510 vfma_lane_f64 (float64_t __a, float64_t __b,
18511 float64_t __c, const int __lane)
18512 {
18513 return __builtin_fma (__b, __c, __a);
18514 }
18515
18516 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18517 vfmad_lane_f64 (float64_t __a, float64_t __b,
18518 float64_t __c, const int __lane)
18519 {
18520 return __builtin_fma (__b, __c, __a);
18521 }
18522
18523 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18524 vfmas_lane_f32 (float32_t __a, float32_t __b,
18525 float32x2_t __c, const int __lane)
18526 {
18527 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18528 }
18529
18530 /* vfma_laneq */
18531
18532 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18533 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
18534 float32x4_t __c, const int __lane)
18535 {
18536 return __builtin_aarch64_fmav2sf (__b,
18537 __aarch64_vdup_laneq_f32 (__c, __lane),
18538 __a);
18539 }
18540
18541 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18542 vfma_laneq_f64 (float64_t __a, float64_t __b,
18543 float64x2_t __c, const int __lane)
18544 {
18545 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18546 }
18547
18548 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18549 vfmad_laneq_f64 (float64_t __a, float64_t __b,
18550 float64x2_t __c, const int __lane)
18551 {
18552 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18553 }
18554
18555 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18556 vfmas_laneq_f32 (float32_t __a, float32_t __b,
18557 float32x4_t __c, const int __lane)
18558 {
18559 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18560 }
18561
18562 /* vfmaq_lane */
18563
18564 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18565 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18566 float32x2_t __c, const int __lane)
18567 {
18568 return __builtin_aarch64_fmav4sf (__b,
18569 __aarch64_vdupq_lane_f32 (__c, __lane),
18570 __a);
18571 }
18572
18573 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18574 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
18575 float64_t __c, const int __lane)
18576 {
18577 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
18578 }
18579
18580 /* vfmaq_laneq */
18581
18582 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18583 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18584 float32x4_t __c, const int __lane)
18585 {
18586 return __builtin_aarch64_fmav4sf (__b,
18587 __aarch64_vdupq_laneq_f32 (__c, __lane),
18588 __a);
18589 }
18590
18591 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18592 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18593 float64x2_t __c, const int __lane)
18594 {
18595 return __builtin_aarch64_fmav2df (__b,
18596 __aarch64_vdupq_laneq_f64 (__c, __lane),
18597 __a);
18598 }
18599
18600 /* vfms_lane */
18601
18602 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18603 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
18604 float32x2_t __c, const int __lane)
18605 {
18606 return __builtin_aarch64_fmav2sf (-__b,
18607 __aarch64_vdup_lane_f32 (__c, __lane),
18608 __a);
18609 }
18610
18611 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18612 vfms_lane_f64 (float64_t __a, float64_t __b,
18613 float64_t __c, const int __lane)
18614 {
18615 return __builtin_fma (-__b, __c, __a);
18616 }
18617
18618 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18619 vfmsd_lane_f64 (float64_t __a, float64_t __b,
18620 float64_t __c, const int __lane)
18621 {
18622 return __builtin_fma (-__b, __c, __a);
18623 }
18624
18625 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18626 vfmss_lane_f32 (float32_t __a, float32_t __b,
18627 float32x2_t __c, const int __lane)
18628 {
18629 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18630 }
18631
18632 /* vfms_laneq */
18633
18634 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18635 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
18636 float32x4_t __c, const int __lane)
18637 {
18638 return __builtin_aarch64_fmav2sf (-__b,
18639 __aarch64_vdup_laneq_f32 (__c, __lane),
18640 __a);
18641 }
18642
18643 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18644 vfms_laneq_f64 (float64_t __a, float64_t __b,
18645 float64x2_t __c, const int __lane)
18646 {
18647 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18648 }
18649
18650 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18651 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
18652 float64x2_t __c, const int __lane)
18653 {
18654 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18655 }
18656
18657 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18658 vfmss_laneq_f32 (float32_t __a, float32_t __b,
18659 float32x4_t __c, const int __lane)
18660 {
18661 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18662 }
18663
18664 /* vfmsq_lane */
18665
18666 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18667 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18668 float32x2_t __c, const int __lane)
18669 {
18670 return __builtin_aarch64_fmav4sf (-__b,
18671 __aarch64_vdupq_lane_f32 (__c, __lane),
18672 __a);
18673 }
18674
18675 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18676 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
18677 float64_t __c, const int __lane)
18678 {
18679 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
18680 }
18681
18682 /* vfmsq_laneq */
18683
18684 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18685 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18686 float32x4_t __c, const int __lane)
18687 {
18688 return __builtin_aarch64_fmav4sf (-__b,
18689 __aarch64_vdupq_laneq_f32 (__c, __lane),
18690 __a);
18691 }
18692
18693 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18694 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18695 float64x2_t __c, const int __lane)
18696 {
18697 return __builtin_aarch64_fmav2df (-__b,
18698 __aarch64_vdupq_laneq_f64 (__c, __lane),
18699 __a);
18700 }
18701
18702 /* vld1 */
18703
18704 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18705 vld1_f32 (const float32_t *a)
18706 {
18707 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
18708 }
18709
18710 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18711 vld1_f64 (const float64_t *a)
18712 {
18713 return *a;
18714 }
18715
18716 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18717 vld1_p8 (const poly8_t *a)
18718 {
18719 return (poly8x8_t)
18720 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18721 }
18722
18723 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18724 vld1_p16 (const poly16_t *a)
18725 {
18726 return (poly16x4_t)
18727 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18728 }
18729
18730 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18731 vld1_s8 (const int8_t *a)
18732 {
18733 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18734 }
18735
18736 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18737 vld1_s16 (const int16_t *a)
18738 {
18739 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18740 }
18741
18742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18743 vld1_s32 (const int32_t *a)
18744 {
18745 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18746 }
18747
18748 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18749 vld1_s64 (const int64_t *a)
18750 {
18751 return *a;
18752 }
18753
18754 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18755 vld1_u8 (const uint8_t *a)
18756 {
18757 return (uint8x8_t)
18758 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18759 }
18760
18761 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18762 vld1_u16 (const uint16_t *a)
18763 {
18764 return (uint16x4_t)
18765 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18766 }
18767
18768 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18769 vld1_u32 (const uint32_t *a)
18770 {
18771 return (uint32x2_t)
18772 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18773 }
18774
18775 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18776 vld1_u64 (const uint64_t *a)
18777 {
18778 return *a;
18779 }
18780
18781 /* vld1q */
18782
18783 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18784 vld1q_f32 (const float32_t *a)
18785 {
18786 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
18787 }
18788
18789 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18790 vld1q_f64 (const float64_t *a)
18791 {
18792 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
18793 }
18794
18795 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18796 vld1q_p8 (const poly8_t *a)
18797 {
18798 return (poly8x16_t)
18799 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18800 }
18801
18802 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18803 vld1q_p16 (const poly16_t *a)
18804 {
18805 return (poly16x8_t)
18806 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18807 }
18808
18809 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18810 vld1q_s8 (const int8_t *a)
18811 {
18812 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18813 }
18814
18815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18816 vld1q_s16 (const int16_t *a)
18817 {
18818 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18819 }
18820
18821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18822 vld1q_s32 (const int32_t *a)
18823 {
18824 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18825 }
18826
18827 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18828 vld1q_s64 (const int64_t *a)
18829 {
18830 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18831 }
18832
18833 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18834 vld1q_u8 (const uint8_t *a)
18835 {
18836 return (uint8x16_t)
18837 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18838 }
18839
18840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18841 vld1q_u16 (const uint16_t *a)
18842 {
18843 return (uint16x8_t)
18844 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18845 }
18846
18847 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18848 vld1q_u32 (const uint32_t *a)
18849 {
18850 return (uint32x4_t)
18851 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18852 }
18853
18854 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18855 vld1q_u64 (const uint64_t *a)
18856 {
18857 return (uint64x2_t)
18858 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18859 }
18860
18861 /* vldn */
18862
18863 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
18864 vld2_s64 (const int64_t * __a)
18865 {
18866 int64x1x2_t ret;
18867 __builtin_aarch64_simd_oi __o;
18868 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18869 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18870 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18871 return ret;
18872 }
18873
18874 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
18875 vld2_u64 (const uint64_t * __a)
18876 {
18877 uint64x1x2_t ret;
18878 __builtin_aarch64_simd_oi __o;
18879 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18880 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18881 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18882 return ret;
18883 }
18884
18885 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
18886 vld2_f64 (const float64_t * __a)
18887 {
18888 float64x1x2_t ret;
18889 __builtin_aarch64_simd_oi __o;
18890 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
18891 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
18892 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
18893 return ret;
18894 }
18895
18896 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
18897 vld2_s8 (const int8_t * __a)
18898 {
18899 int8x8x2_t ret;
18900 __builtin_aarch64_simd_oi __o;
18901 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18902 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18903 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18904 return ret;
18905 }
18906
18907 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
18908 vld2_p8 (const poly8_t * __a)
18909 {
18910 poly8x8x2_t ret;
18911 __builtin_aarch64_simd_oi __o;
18912 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18913 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18914 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18915 return ret;
18916 }
18917
18918 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
18919 vld2_s16 (const int16_t * __a)
18920 {
18921 int16x4x2_t ret;
18922 __builtin_aarch64_simd_oi __o;
18923 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18924 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18925 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18926 return ret;
18927 }
18928
18929 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
18930 vld2_p16 (const poly16_t * __a)
18931 {
18932 poly16x4x2_t ret;
18933 __builtin_aarch64_simd_oi __o;
18934 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18935 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18936 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18937 return ret;
18938 }
18939
18940 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
18941 vld2_s32 (const int32_t * __a)
18942 {
18943 int32x2x2_t ret;
18944 __builtin_aarch64_simd_oi __o;
18945 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18946 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18947 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18948 return ret;
18949 }
18950
18951 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
18952 vld2_u8 (const uint8_t * __a)
18953 {
18954 uint8x8x2_t ret;
18955 __builtin_aarch64_simd_oi __o;
18956 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18957 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18958 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18959 return ret;
18960 }
18961
18962 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
18963 vld2_u16 (const uint16_t * __a)
18964 {
18965 uint16x4x2_t ret;
18966 __builtin_aarch64_simd_oi __o;
18967 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18968 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18969 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18970 return ret;
18971 }
18972
18973 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
18974 vld2_u32 (const uint32_t * __a)
18975 {
18976 uint32x2x2_t ret;
18977 __builtin_aarch64_simd_oi __o;
18978 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18979 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18980 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18981 return ret;
18982 }
18983
18984 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
18985 vld2_f32 (const float32_t * __a)
18986 {
18987 float32x2x2_t ret;
18988 __builtin_aarch64_simd_oi __o;
18989 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
18990 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
18991 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
18992 return ret;
18993 }
18994
18995 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
18996 vld2q_s8 (const int8_t * __a)
18997 {
18998 int8x16x2_t ret;
18999 __builtin_aarch64_simd_oi __o;
19000 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19001 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19002 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19003 return ret;
19004 }
19005
19006 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
19007 vld2q_p8 (const poly8_t * __a)
19008 {
19009 poly8x16x2_t ret;
19010 __builtin_aarch64_simd_oi __o;
19011 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19012 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19013 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19014 return ret;
19015 }
19016
19017 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
19018 vld2q_s16 (const int16_t * __a)
19019 {
19020 int16x8x2_t ret;
19021 __builtin_aarch64_simd_oi __o;
19022 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19023 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19024 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19025 return ret;
19026 }
19027
19028 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
19029 vld2q_p16 (const poly16_t * __a)
19030 {
19031 poly16x8x2_t ret;
19032 __builtin_aarch64_simd_oi __o;
19033 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19034 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19035 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19036 return ret;
19037 }
19038
19039 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
19040 vld2q_s32 (const int32_t * __a)
19041 {
19042 int32x4x2_t ret;
19043 __builtin_aarch64_simd_oi __o;
19044 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19045 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19046 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19047 return ret;
19048 }
19049
19050 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
19051 vld2q_s64 (const int64_t * __a)
19052 {
19053 int64x2x2_t ret;
19054 __builtin_aarch64_simd_oi __o;
19055 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19056 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19057 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19058 return ret;
19059 }
19060
19061 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
19062 vld2q_u8 (const uint8_t * __a)
19063 {
19064 uint8x16x2_t ret;
19065 __builtin_aarch64_simd_oi __o;
19066 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19067 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19068 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19069 return ret;
19070 }
19071
19072 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
19073 vld2q_u16 (const uint16_t * __a)
19074 {
19075 uint16x8x2_t ret;
19076 __builtin_aarch64_simd_oi __o;
19077 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19078 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19079 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19080 return ret;
19081 }
19082
19083 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
19084 vld2q_u32 (const uint32_t * __a)
19085 {
19086 uint32x4x2_t ret;
19087 __builtin_aarch64_simd_oi __o;
19088 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19089 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19090 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19091 return ret;
19092 }
19093
19094 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
19095 vld2q_u64 (const uint64_t * __a)
19096 {
19097 uint64x2x2_t ret;
19098 __builtin_aarch64_simd_oi __o;
19099 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19100 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19101 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19102 return ret;
19103 }
19104
19105 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
19106 vld2q_f32 (const float32_t * __a)
19107 {
19108 float32x4x2_t ret;
19109 __builtin_aarch64_simd_oi __o;
19110 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
19111 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
19112 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
19113 return ret;
19114 }
19115
19116 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
19117 vld2q_f64 (const float64_t * __a)
19118 {
19119 float64x2x2_t ret;
19120 __builtin_aarch64_simd_oi __o;
19121 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
19122 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
19123 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
19124 return ret;
19125 }
19126
19127 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
19128 vld3_s64 (const int64_t * __a)
19129 {
19130 int64x1x3_t ret;
19131 __builtin_aarch64_simd_ci __o;
19132 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19133 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19134 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19135 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19136 return ret;
19137 }
19138
19139 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
19140 vld3_u64 (const uint64_t * __a)
19141 {
19142 uint64x1x3_t ret;
19143 __builtin_aarch64_simd_ci __o;
19144 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19145 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19146 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19147 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19148 return ret;
19149 }
19150
19151 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
19152 vld3_f64 (const float64_t * __a)
19153 {
19154 float64x1x3_t ret;
19155 __builtin_aarch64_simd_ci __o;
19156 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
19157 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
19158 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
19159 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
19160 return ret;
19161 }
19162
19163 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
19164 vld3_s8 (const int8_t * __a)
19165 {
19166 int8x8x3_t ret;
19167 __builtin_aarch64_simd_ci __o;
19168 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19169 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19170 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19171 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19172 return ret;
19173 }
19174
19175 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
19176 vld3_p8 (const poly8_t * __a)
19177 {
19178 poly8x8x3_t ret;
19179 __builtin_aarch64_simd_ci __o;
19180 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19181 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19182 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19183 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19184 return ret;
19185 }
19186
19187 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
19188 vld3_s16 (const int16_t * __a)
19189 {
19190 int16x4x3_t ret;
19191 __builtin_aarch64_simd_ci __o;
19192 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19193 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19194 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19195 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19196 return ret;
19197 }
19198
19199 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
19200 vld3_p16 (const poly16_t * __a)
19201 {
19202 poly16x4x3_t ret;
19203 __builtin_aarch64_simd_ci __o;
19204 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19205 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19206 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19207 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19208 return ret;
19209 }
19210
19211 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
19212 vld3_s32 (const int32_t * __a)
19213 {
19214 int32x2x3_t ret;
19215 __builtin_aarch64_simd_ci __o;
19216 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19217 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19218 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19219 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19220 return ret;
19221 }
19222
19223 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
19224 vld3_u8 (const uint8_t * __a)
19225 {
19226 uint8x8x3_t ret;
19227 __builtin_aarch64_simd_ci __o;
19228 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19229 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19230 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19231 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19232 return ret;
19233 }
19234
19235 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
19236 vld3_u16 (const uint16_t * __a)
19237 {
19238 uint16x4x3_t ret;
19239 __builtin_aarch64_simd_ci __o;
19240 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19241 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19242 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19243 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19244 return ret;
19245 }
19246
19247 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
19248 vld3_u32 (const uint32_t * __a)
19249 {
19250 uint32x2x3_t ret;
19251 __builtin_aarch64_simd_ci __o;
19252 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19253 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19254 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19255 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19256 return ret;
19257 }
19258
19259 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
19260 vld3_f32 (const float32_t * __a)
19261 {
19262 float32x2x3_t ret;
19263 __builtin_aarch64_simd_ci __o;
19264 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
19265 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
19266 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
19267 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
19268 return ret;
19269 }
19270
19271 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
19272 vld3q_s8 (const int8_t * __a)
19273 {
19274 int8x16x3_t ret;
19275 __builtin_aarch64_simd_ci __o;
19276 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19277 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19278 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19279 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19280 return ret;
19281 }
19282
19283 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
19284 vld3q_p8 (const poly8_t * __a)
19285 {
19286 poly8x16x3_t ret;
19287 __builtin_aarch64_simd_ci __o;
19288 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19289 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19290 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19291 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19292 return ret;
19293 }
19294
19295 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
19296 vld3q_s16 (const int16_t * __a)
19297 {
19298 int16x8x3_t ret;
19299 __builtin_aarch64_simd_ci __o;
19300 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19301 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19302 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19303 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19304 return ret;
19305 }
19306
19307 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
19308 vld3q_p16 (const poly16_t * __a)
19309 {
19310 poly16x8x3_t ret;
19311 __builtin_aarch64_simd_ci __o;
19312 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19313 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19314 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19315 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19316 return ret;
19317 }
19318
19319 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
19320 vld3q_s32 (const int32_t * __a)
19321 {
19322 int32x4x3_t ret;
19323 __builtin_aarch64_simd_ci __o;
19324 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19325 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19326 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19327 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19328 return ret;
19329 }
19330
19331 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
19332 vld3q_s64 (const int64_t * __a)
19333 {
19334 int64x2x3_t ret;
19335 __builtin_aarch64_simd_ci __o;
19336 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19337 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19338 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19339 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19340 return ret;
19341 }
19342
19343 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
19344 vld3q_u8 (const uint8_t * __a)
19345 {
19346 uint8x16x3_t ret;
19347 __builtin_aarch64_simd_ci __o;
19348 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19349 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19350 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19351 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19352 return ret;
19353 }
19354
19355 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
19356 vld3q_u16 (const uint16_t * __a)
19357 {
19358 uint16x8x3_t ret;
19359 __builtin_aarch64_simd_ci __o;
19360 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19361 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19362 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19363 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19364 return ret;
19365 }
19366
19367 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
19368 vld3q_u32 (const uint32_t * __a)
19369 {
19370 uint32x4x3_t ret;
19371 __builtin_aarch64_simd_ci __o;
19372 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19373 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19374 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19375 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19376 return ret;
19377 }
19378
19379 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
19380 vld3q_u64 (const uint64_t * __a)
19381 {
19382 uint64x2x3_t ret;
19383 __builtin_aarch64_simd_ci __o;
19384 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19385 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19386 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19387 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19388 return ret;
19389 }
19390
19391 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
19392 vld3q_f32 (const float32_t * __a)
19393 {
19394 float32x4x3_t ret;
19395 __builtin_aarch64_simd_ci __o;
19396 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
19397 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
19398 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
19399 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
19400 return ret;
19401 }
19402
19403 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
19404 vld3q_f64 (const float64_t * __a)
19405 {
19406 float64x2x3_t ret;
19407 __builtin_aarch64_simd_ci __o;
19408 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
19409 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
19410 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
19411 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
19412 return ret;
19413 }
19414
19415 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
19416 vld4_s64 (const int64_t * __a)
19417 {
19418 int64x1x4_t ret;
19419 __builtin_aarch64_simd_xi __o;
19420 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19421 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19422 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19423 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19424 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19425 return ret;
19426 }
19427
19428 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
19429 vld4_u64 (const uint64_t * __a)
19430 {
19431 uint64x1x4_t ret;
19432 __builtin_aarch64_simd_xi __o;
19433 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19434 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19435 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19436 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19437 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19438 return ret;
19439 }
19440
19441 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
19442 vld4_f64 (const float64_t * __a)
19443 {
19444 float64x1x4_t ret;
19445 __builtin_aarch64_simd_xi __o;
19446 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
19447 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
19448 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
19449 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
19450 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
19451 return ret;
19452 }
19453
19454 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
19455 vld4_s8 (const int8_t * __a)
19456 {
19457 int8x8x4_t ret;
19458 __builtin_aarch64_simd_xi __o;
19459 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19460 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19461 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19462 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19463 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19464 return ret;
19465 }
19466
19467 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
19468 vld4_p8 (const poly8_t * __a)
19469 {
19470 poly8x8x4_t ret;
19471 __builtin_aarch64_simd_xi __o;
19472 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19473 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19474 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19475 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19476 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19477 return ret;
19478 }
19479
19480 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
19481 vld4_s16 (const int16_t * __a)
19482 {
19483 int16x4x4_t ret;
19484 __builtin_aarch64_simd_xi __o;
19485 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19486 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19487 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19488 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19489 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19490 return ret;
19491 }
19492
19493 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
19494 vld4_p16 (const poly16_t * __a)
19495 {
19496 poly16x4x4_t ret;
19497 __builtin_aarch64_simd_xi __o;
19498 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19499 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19500 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19501 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19502 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19503 return ret;
19504 }
19505
19506 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
19507 vld4_s32 (const int32_t * __a)
19508 {
19509 int32x2x4_t ret;
19510 __builtin_aarch64_simd_xi __o;
19511 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19512 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19513 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19514 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19515 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19516 return ret;
19517 }
19518
19519 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
19520 vld4_u8 (const uint8_t * __a)
19521 {
19522 uint8x8x4_t ret;
19523 __builtin_aarch64_simd_xi __o;
19524 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19525 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19526 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19527 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19528 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19529 return ret;
19530 }
19531
19532 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
19533 vld4_u16 (const uint16_t * __a)
19534 {
19535 uint16x4x4_t ret;
19536 __builtin_aarch64_simd_xi __o;
19537 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19538 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19539 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19540 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19541 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19542 return ret;
19543 }
19544
19545 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
19546 vld4_u32 (const uint32_t * __a)
19547 {
19548 uint32x2x4_t ret;
19549 __builtin_aarch64_simd_xi __o;
19550 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19551 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19552 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19553 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19554 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19555 return ret;
19556 }
19557
19558 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
19559 vld4_f32 (const float32_t * __a)
19560 {
19561 float32x2x4_t ret;
19562 __builtin_aarch64_simd_xi __o;
19563 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
19564 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
19565 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
19566 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
19567 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
19568 return ret;
19569 }
19570
19571 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
19572 vld4q_s8 (const int8_t * __a)
19573 {
19574 int8x16x4_t ret;
19575 __builtin_aarch64_simd_xi __o;
19576 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19577 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19578 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19579 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19580 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19581 return ret;
19582 }
19583
19584 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
19585 vld4q_p8 (const poly8_t * __a)
19586 {
19587 poly8x16x4_t ret;
19588 __builtin_aarch64_simd_xi __o;
19589 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19590 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19591 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19592 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19593 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19594 return ret;
19595 }
19596
19597 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
19598 vld4q_s16 (const int16_t * __a)
19599 {
19600 int16x8x4_t ret;
19601 __builtin_aarch64_simd_xi __o;
19602 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19603 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19604 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19605 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19606 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19607 return ret;
19608 }
19609
19610 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
19611 vld4q_p16 (const poly16_t * __a)
19612 {
19613 poly16x8x4_t ret;
19614 __builtin_aarch64_simd_xi __o;
19615 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19616 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19617 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19618 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19619 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19620 return ret;
19621 }
19622
19623 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
19624 vld4q_s32 (const int32_t * __a)
19625 {
19626 int32x4x4_t ret;
19627 __builtin_aarch64_simd_xi __o;
19628 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19629 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19630 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19631 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19632 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19633 return ret;
19634 }
19635
19636 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
19637 vld4q_s64 (const int64_t * __a)
19638 {
19639 int64x2x4_t ret;
19640 __builtin_aarch64_simd_xi __o;
19641 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19642 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19643 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19644 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19645 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19646 return ret;
19647 }
19648
19649 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
19650 vld4q_u8 (const uint8_t * __a)
19651 {
19652 uint8x16x4_t ret;
19653 __builtin_aarch64_simd_xi __o;
19654 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19655 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19656 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19657 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19658 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19659 return ret;
19660 }
19661
19662 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
19663 vld4q_u16 (const uint16_t * __a)
19664 {
19665 uint16x8x4_t ret;
19666 __builtin_aarch64_simd_xi __o;
19667 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19668 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19669 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19670 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19671 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19672 return ret;
19673 }
19674
19675 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
19676 vld4q_u32 (const uint32_t * __a)
19677 {
19678 uint32x4x4_t ret;
19679 __builtin_aarch64_simd_xi __o;
19680 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19681 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19682 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19683 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19684 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19685 return ret;
19686 }
19687
19688 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
19689 vld4q_u64 (const uint64_t * __a)
19690 {
19691 uint64x2x4_t ret;
19692 __builtin_aarch64_simd_xi __o;
19693 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19694 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19695 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19696 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19697 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19698 return ret;
19699 }
19700
19701 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
19702 vld4q_f32 (const float32_t * __a)
19703 {
19704 float32x4x4_t ret;
19705 __builtin_aarch64_simd_xi __o;
19706 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
19707 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
19708 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
19709 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
19710 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
19711 return ret;
19712 }
19713
19714 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
19715 vld4q_f64 (const float64_t * __a)
19716 {
19717 float64x2x4_t ret;
19718 __builtin_aarch64_simd_xi __o;
19719 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
19720 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
19721 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
19722 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
19723 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
19724 return ret;
19725 }
19726
19727 /* vmax */
19728
19729 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19730 vmax_f32 (float32x2_t __a, float32x2_t __b)
19731 {
19732 return __builtin_aarch64_smax_nanv2sf (__a, __b);
19733 }
19734
19735 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19736 vmax_s8 (int8x8_t __a, int8x8_t __b)
19737 {
19738 return __builtin_aarch64_smaxv8qi (__a, __b);
19739 }
19740
19741 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19742 vmax_s16 (int16x4_t __a, int16x4_t __b)
19743 {
19744 return __builtin_aarch64_smaxv4hi (__a, __b);
19745 }
19746
19747 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19748 vmax_s32 (int32x2_t __a, int32x2_t __b)
19749 {
19750 return __builtin_aarch64_smaxv2si (__a, __b);
19751 }
19752
19753 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19754 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
19755 {
19756 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
19757 (int8x8_t) __b);
19758 }
19759
19760 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19761 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
19762 {
19763 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
19764 (int16x4_t) __b);
19765 }
19766
19767 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19768 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
19769 {
19770 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
19771 (int32x2_t) __b);
19772 }
19773
19774 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19775 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
19776 {
19777 return __builtin_aarch64_smax_nanv4sf (__a, __b);
19778 }
19779
19780 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19781 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
19782 {
19783 return __builtin_aarch64_smax_nanv2df (__a, __b);
19784 }
19785
19786 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19787 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
19788 {
19789 return __builtin_aarch64_smaxv16qi (__a, __b);
19790 }
19791
19792 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19793 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
19794 {
19795 return __builtin_aarch64_smaxv8hi (__a, __b);
19796 }
19797
19798 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19799 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
19800 {
19801 return __builtin_aarch64_smaxv4si (__a, __b);
19802 }
19803
19804 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19805 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
19806 {
19807 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
19808 (int8x16_t) __b);
19809 }
19810
19811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19812 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
19813 {
19814 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
19815 (int16x8_t) __b);
19816 }
19817
19818 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19819 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
19820 {
19821 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
19822 (int32x4_t) __b);
19823 }
19824
19825 /* vmaxnm */
19826
19827 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19828 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
19829 {
19830 return __builtin_aarch64_smaxv2sf (__a, __b);
19831 }
19832
19833 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19834 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
19835 {
19836 return __builtin_aarch64_smaxv4sf (__a, __b);
19837 }
19838
19839 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19840 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
19841 {
19842 return __builtin_aarch64_smaxv2df (__a, __b);
19843 }
19844
19845 /* vmaxv */
19846
19847 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19848 vmaxv_f32 (float32x2_t __a)
19849 {
19850 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
19851 __LANE0 (2));
19852 }
19853
19854 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19855 vmaxv_s8 (int8x8_t __a)
19856 {
19857 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), __LANE0 (8));
19858 }
19859
19860 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19861 vmaxv_s16 (int16x4_t __a)
19862 {
19863 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), __LANE0 (4));
19864 }
19865
19866 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19867 vmaxv_s32 (int32x2_t __a)
19868 {
19869 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), __LANE0 (2));
19870 }
19871
19872 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19873 vmaxv_u8 (uint8x8_t __a)
19874 {
19875 return vget_lane_u8 ((uint8x8_t)
19876 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
19877 __LANE0 (8));
19878 }
19879
19880 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19881 vmaxv_u16 (uint16x4_t __a)
19882 {
19883 return vget_lane_u16 ((uint16x4_t)
19884 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
19885 __LANE0 (4));
19886 }
19887
19888 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19889 vmaxv_u32 (uint32x2_t __a)
19890 {
19891 return vget_lane_u32 ((uint32x2_t)
19892 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
19893 __LANE0 (2));
19894 }
19895
19896 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19897 vmaxvq_f32 (float32x4_t __a)
19898 {
19899 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
19900 __LANE0 (4));
19901 }
19902
19903 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19904 vmaxvq_f64 (float64x2_t __a)
19905 {
19906 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
19907 __LANE0 (2));
19908 }
19909
19910 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19911 vmaxvq_s8 (int8x16_t __a)
19912 {
19913 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), __LANE0 (16));
19914 }
19915
19916 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19917 vmaxvq_s16 (int16x8_t __a)
19918 {
19919 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), __LANE0 (8));
19920 }
19921
19922 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19923 vmaxvq_s32 (int32x4_t __a)
19924 {
19925 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), __LANE0 (4));
19926 }
19927
19928 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19929 vmaxvq_u8 (uint8x16_t __a)
19930 {
19931 return vgetq_lane_u8 ((uint8x16_t)
19932 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
19933 __LANE0 (16));
19934 }
19935
19936 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19937 vmaxvq_u16 (uint16x8_t __a)
19938 {
19939 return vgetq_lane_u16 ((uint16x8_t)
19940 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
19941 __LANE0 (8));
19942 }
19943
19944 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19945 vmaxvq_u32 (uint32x4_t __a)
19946 {
19947 return vgetq_lane_u32 ((uint32x4_t)
19948 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
19949 __LANE0 (4));
19950 }
19951
19952 /* vmaxnmv */
19953
19954 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19955 vmaxnmv_f32 (float32x2_t __a)
19956 {
19957 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
19958 __LANE0 (2));
19959 }
19960
19961 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19962 vmaxnmvq_f32 (float32x4_t __a)
19963 {
19964 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), __LANE0 (4));
19965 }
19966
19967 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19968 vmaxnmvq_f64 (float64x2_t __a)
19969 {
19970 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), __LANE0 (2));
19971 }
19972
19973 /* vmin */
19974
19975 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19976 vmin_f32 (float32x2_t __a, float32x2_t __b)
19977 {
19978 return __builtin_aarch64_smin_nanv2sf (__a, __b);
19979 }
19980
19981 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19982 vmin_s8 (int8x8_t __a, int8x8_t __b)
19983 {
19984 return __builtin_aarch64_sminv8qi (__a, __b);
19985 }
19986
19987 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19988 vmin_s16 (int16x4_t __a, int16x4_t __b)
19989 {
19990 return __builtin_aarch64_sminv4hi (__a, __b);
19991 }
19992
19993 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19994 vmin_s32 (int32x2_t __a, int32x2_t __b)
19995 {
19996 return __builtin_aarch64_sminv2si (__a, __b);
19997 }
19998
19999 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20000 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
20001 {
20002 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
20003 (int8x8_t) __b);
20004 }
20005
20006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20007 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
20008 {
20009 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
20010 (int16x4_t) __b);
20011 }
20012
20013 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20014 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
20015 {
20016 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
20017 (int32x2_t) __b);
20018 }
20019
20020 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20021 vminq_f32 (float32x4_t __a, float32x4_t __b)
20022 {
20023 return __builtin_aarch64_smin_nanv4sf (__a, __b);
20024 }
20025
20026 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20027 vminq_f64 (float64x2_t __a, float64x2_t __b)
20028 {
20029 return __builtin_aarch64_smin_nanv2df (__a, __b);
20030 }
20031
20032 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20033 vminq_s8 (int8x16_t __a, int8x16_t __b)
20034 {
20035 return __builtin_aarch64_sminv16qi (__a, __b);
20036 }
20037
20038 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20039 vminq_s16 (int16x8_t __a, int16x8_t __b)
20040 {
20041 return __builtin_aarch64_sminv8hi (__a, __b);
20042 }
20043
20044 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20045 vminq_s32 (int32x4_t __a, int32x4_t __b)
20046 {
20047 return __builtin_aarch64_sminv4si (__a, __b);
20048 }
20049
20050 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20051 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
20052 {
20053 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
20054 (int8x16_t) __b);
20055 }
20056
20057 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20058 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
20059 {
20060 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
20061 (int16x8_t) __b);
20062 }
20063
20064 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20065 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
20066 {
20067 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
20068 (int32x4_t) __b);
20069 }
20070
20071 /* vminnm */
20072
20073 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20074 vminnm_f32 (float32x2_t __a, float32x2_t __b)
20075 {
20076 return __builtin_aarch64_sminv2sf (__a, __b);
20077 }
20078
20079 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20080 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
20081 {
20082 return __builtin_aarch64_sminv4sf (__a, __b);
20083 }
20084
20085 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20086 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
20087 {
20088 return __builtin_aarch64_sminv2df (__a, __b);
20089 }
20090
20091 /* vminv */
20092
20093 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20094 vminv_f32 (float32x2_t __a)
20095 {
20096 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
20097 __LANE0 (2));
20098 }
20099
20100 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20101 vminv_s8 (int8x8_t __a)
20102 {
20103 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
20104 __LANE0 (8));
20105 }
20106
20107 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20108 vminv_s16 (int16x4_t __a)
20109 {
20110 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), __LANE0 (4));
20111 }
20112
20113 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20114 vminv_s32 (int32x2_t __a)
20115 {
20116 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), __LANE0 (2));
20117 }
20118
20119 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20120 vminv_u8 (uint8x8_t __a)
20121 {
20122 return vget_lane_u8 ((uint8x8_t)
20123 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
20124 __LANE0 (8));
20125 }
20126
20127 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20128 vminv_u16 (uint16x4_t __a)
20129 {
20130 return vget_lane_u16 ((uint16x4_t)
20131 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
20132 __LANE0 (4));
20133 }
20134
20135 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20136 vminv_u32 (uint32x2_t __a)
20137 {
20138 return vget_lane_u32 ((uint32x2_t)
20139 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
20140 __LANE0 (2));
20141 }
20142
20143 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20144 vminvq_f32 (float32x4_t __a)
20145 {
20146 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
20147 __LANE0 (4));
20148 }
20149
20150 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20151 vminvq_f64 (float64x2_t __a)
20152 {
20153 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
20154 __LANE0 (2));
20155 }
20156
20157 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20158 vminvq_s8 (int8x16_t __a)
20159 {
20160 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), __LANE0 (16));
20161 }
20162
20163 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20164 vminvq_s16 (int16x8_t __a)
20165 {
20166 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), __LANE0 (8));
20167 }
20168
20169 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20170 vminvq_s32 (int32x4_t __a)
20171 {
20172 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), __LANE0 (4));
20173 }
20174
20175 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20176 vminvq_u8 (uint8x16_t __a)
20177 {
20178 return vgetq_lane_u8 ((uint8x16_t)
20179 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
20180 __LANE0 (16));
20181 }
20182
20183 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20184 vminvq_u16 (uint16x8_t __a)
20185 {
20186 return vgetq_lane_u16 ((uint16x8_t)
20187 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
20188 __LANE0 (8));
20189 }
20190
20191 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20192 vminvq_u32 (uint32x4_t __a)
20193 {
20194 return vgetq_lane_u32 ((uint32x4_t)
20195 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
20196 __LANE0 (4));
20197 }
20198
20199 /* vminnmv */
20200
20201 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20202 vminnmv_f32 (float32x2_t __a)
20203 {
20204 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), __LANE0 (2));
20205 }
20206
20207 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20208 vminnmvq_f32 (float32x4_t __a)
20209 {
20210 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), __LANE0 (4));
20211 }
20212
20213 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20214 vminnmvq_f64 (float64x2_t __a)
20215 {
20216 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), __LANE0 (2));
20217 }
20218
20219 /* vmla */
20220
20221 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20222 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20223 {
20224 return a + b * c;
20225 }
20226
20227 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20228 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20229 {
20230 return a + b * c;
20231 }
20232
20233 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20234 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20235 {
20236 return a + b * c;
20237 }
20238
20239 /* vmla_lane */
20240
20241 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20242 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
20243 float32x2_t __c, const int __lane)
20244 {
20245 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20246 }
20247
20248 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20249 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
20250 int16x4_t __c, const int __lane)
20251 {
20252 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20253 }
20254
20255 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20256 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
20257 int32x2_t __c, const int __lane)
20258 {
20259 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20260 }
20261
20262 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20263 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20264 uint16x4_t __c, const int __lane)
20265 {
20266 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20267 }
20268
20269 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20270 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20271 uint32x2_t __c, const int __lane)
20272 {
20273 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20274 }
20275
20276 /* vmla_laneq */
20277
20278 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20279 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
20280 float32x4_t __c, const int __lane)
20281 {
20282 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20283 }
20284
20285 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20286 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
20287 int16x8_t __c, const int __lane)
20288 {
20289 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20290 }
20291
20292 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20293 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
20294 int32x4_t __c, const int __lane)
20295 {
20296 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20297 }
20298
20299 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20300 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20301 uint16x8_t __c, const int __lane)
20302 {
20303 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20304 }
20305
20306 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20307 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20308 uint32x4_t __c, const int __lane)
20309 {
20310 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20311 }
20312
20313 /* vmlaq_lane */
20314
20315 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20316 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
20317 float32x2_t __c, const int __lane)
20318 {
20319 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20320 }
20321
20322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20323 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
20324 int16x4_t __c, const int __lane)
20325 {
20326 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20327 }
20328
20329 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20330 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
20331 int32x2_t __c, const int __lane)
20332 {
20333 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20334 }
20335
20336 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20337 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20338 uint16x4_t __c, const int __lane)
20339 {
20340 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20341 }
20342
20343 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20344 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20345 uint32x2_t __c, const int __lane)
20346 {
20347 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20348 }
20349
20350 /* vmlaq_laneq */
20351
20352 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20353 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20354 float32x4_t __c, const int __lane)
20355 {
20356 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20357 }
20358
20359 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20360 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20361 int16x8_t __c, const int __lane)
20362 {
20363 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20364 }
20365
20366 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20367 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20368 int32x4_t __c, const int __lane)
20369 {
20370 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20371 }
20372
20373 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20374 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20375 uint16x8_t __c, const int __lane)
20376 {
20377 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20378 }
20379
20380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20381 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20382 uint32x4_t __c, const int __lane)
20383 {
20384 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20385 }
20386
20387 /* vmls */
20388
20389 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20390 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20391 {
20392 return a - b * c;
20393 }
20394
20395 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20396 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20397 {
20398 return a - b * c;
20399 }
20400
20401 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20402 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20403 {
20404 return a - b * c;
20405 }
20406
20407 /* vmls_lane */
20408
20409 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20410 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
20411 float32x2_t __c, const int __lane)
20412 {
20413 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20414 }
20415
20416 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20417 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
20418 int16x4_t __c, const int __lane)
20419 {
20420 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20421 }
20422
20423 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20424 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
20425 int32x2_t __c, const int __lane)
20426 {
20427 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20428 }
20429
20430 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20431 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20432 uint16x4_t __c, const int __lane)
20433 {
20434 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20435 }
20436
20437 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20438 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20439 uint32x2_t __c, const int __lane)
20440 {
20441 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20442 }
20443
20444 /* vmls_laneq */
20445
20446 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20447 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
20448 float32x4_t __c, const int __lane)
20449 {
20450 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20451 }
20452
20453 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20454 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
20455 int16x8_t __c, const int __lane)
20456 {
20457 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20458 }
20459
20460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20461 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
20462 int32x4_t __c, const int __lane)
20463 {
20464 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20465 }
20466
20467 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20468 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20469 uint16x8_t __c, const int __lane)
20470 {
20471 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20472 }
20473
20474 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20475 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20476 uint32x4_t __c, const int __lane)
20477 {
20478 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20479 }
20480
20481 /* vmlsq_lane */
20482
20483 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20484 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
20485 float32x2_t __c, const int __lane)
20486 {
20487 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20488 }
20489
20490 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20491 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
20492 int16x4_t __c, const int __lane)
20493 {
20494 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20495 }
20496
20497 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20498 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
20499 int32x2_t __c, const int __lane)
20500 {
20501 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20502 }
20503
20504 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20505 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20506 uint16x4_t __c, const int __lane)
20507 {
20508 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20509 }
20510
20511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20512 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20513 uint32x2_t __c, const int __lane)
20514 {
20515 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20516 }
20517
20518 /* vmlsq_laneq */
20519
20520 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20521 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20522 float32x4_t __c, const int __lane)
20523 {
20524 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20525 }
20526
20527 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20528 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20529 int16x8_t __c, const int __lane)
20530 {
20531 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20532 }
20533
20534 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20535 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20536 int32x4_t __c, const int __lane)
20537 {
20538 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20539 }
20540 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20541 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20542 uint16x8_t __c, const int __lane)
20543 {
20544 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20545 }
20546
20547 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20548 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20549 uint32x4_t __c, const int __lane)
20550 {
20551 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20552 }
20553
20554 /* vmov_n_ */
20555
20556 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20557 vmov_n_f32 (float32_t __a)
20558 {
20559 return vdup_n_f32 (__a);
20560 }
20561
20562 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20563 vmov_n_f64 (float64_t __a)
20564 {
20565 return __a;
20566 }
20567
20568 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20569 vmov_n_p8 (poly8_t __a)
20570 {
20571 return vdup_n_p8 (__a);
20572 }
20573
20574 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20575 vmov_n_p16 (poly16_t __a)
20576 {
20577 return vdup_n_p16 (__a);
20578 }
20579
20580 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20581 vmov_n_s8 (int8_t __a)
20582 {
20583 return vdup_n_s8 (__a);
20584 }
20585
20586 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20587 vmov_n_s16 (int16_t __a)
20588 {
20589 return vdup_n_s16 (__a);
20590 }
20591
20592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20593 vmov_n_s32 (int32_t __a)
20594 {
20595 return vdup_n_s32 (__a);
20596 }
20597
20598 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20599 vmov_n_s64 (int64_t __a)
20600 {
20601 return __a;
20602 }
20603
20604 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20605 vmov_n_u8 (uint8_t __a)
20606 {
20607 return vdup_n_u8 (__a);
20608 }
20609
20610 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20611 vmov_n_u16 (uint16_t __a)
20612 {
20613 return vdup_n_u16 (__a);
20614 }
20615
20616 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20617 vmov_n_u32 (uint32_t __a)
20618 {
20619 return vdup_n_u32 (__a);
20620 }
20621
20622 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20623 vmov_n_u64 (uint64_t __a)
20624 {
20625 return __a;
20626 }
20627
20628 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20629 vmovq_n_f32 (float32_t __a)
20630 {
20631 return vdupq_n_f32 (__a);
20632 }
20633
20634 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20635 vmovq_n_f64 (float64_t __a)
20636 {
20637 return vdupq_n_f64 (__a);
20638 }
20639
20640 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20641 vmovq_n_p8 (poly8_t __a)
20642 {
20643 return vdupq_n_p8 (__a);
20644 }
20645
20646 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20647 vmovq_n_p16 (poly16_t __a)
20648 {
20649 return vdupq_n_p16 (__a);
20650 }
20651
20652 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20653 vmovq_n_s8 (int8_t __a)
20654 {
20655 return vdupq_n_s8 (__a);
20656 }
20657
20658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20659 vmovq_n_s16 (int16_t __a)
20660 {
20661 return vdupq_n_s16 (__a);
20662 }
20663
20664 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20665 vmovq_n_s32 (int32_t __a)
20666 {
20667 return vdupq_n_s32 (__a);
20668 }
20669
20670 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20671 vmovq_n_s64 (int64_t __a)
20672 {
20673 return vdupq_n_s64 (__a);
20674 }
20675
20676 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20677 vmovq_n_u8 (uint8_t __a)
20678 {
20679 return vdupq_n_u8 (__a);
20680 }
20681
20682 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20683 vmovq_n_u16 (uint16_t __a)
20684 {
20685 return vdupq_n_u16 (__a);
20686 }
20687
20688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20689 vmovq_n_u32 (uint32_t __a)
20690 {
20691 return vdupq_n_u32 (__a);
20692 }
20693
20694 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20695 vmovq_n_u64 (uint64_t __a)
20696 {
20697 return vdupq_n_u64 (__a);
20698 }
20699
20700 /* vmul_lane */
20701
20702 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20703 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
20704 {
20705 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20706 }
20707
20708 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20709 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
20710 {
20711 return __a * __b;
20712 }
20713
20714 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20715 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
20716 {
20717 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20718 }
20719
20720 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20721 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
20722 {
20723 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20724 }
20725
20726 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20727 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
20728 {
20729 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20730 }
20731
20732 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20733 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
20734 {
20735 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20736 }
20737
20738 /* vmul_laneq */
20739
20740 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20741 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
20742 {
20743 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20744 }
20745
20746 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20747 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
20748 {
20749 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20750 }
20751
20752 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20753 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
20754 {
20755 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20756 }
20757
20758 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20759 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
20760 {
20761 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20762 }
20763
20764 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20765 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
20766 {
20767 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20768 }
20769
20770 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20771 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
20772 {
20773 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20774 }
20775
20776 /* vmulq_lane */
20777
20778 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20779 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
20780 {
20781 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20782 }
20783
20784 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20785 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
20786 {
20787 return __a * __b;
20788 }
20789
20790 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20791 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
20792 {
20793 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20794 }
20795
20796 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20797 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
20798 {
20799 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20800 }
20801
20802 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20803 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
20804 {
20805 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20806 }
20807
20808 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20809 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
20810 {
20811 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20812 }
20813
20814 /* vmulq_laneq */
20815
20816 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20817 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
20818 {
20819 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20820 }
20821
20822 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20823 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
20824 {
20825 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20826 }
20827
20828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20829 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
20830 {
20831 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20832 }
20833
20834 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20835 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
20836 {
20837 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20838 }
20839
20840 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20841 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
20842 {
20843 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20844 }
20845
20846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20847 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
20848 {
20849 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20850 }
20851
20852 /* vneg */
20853
20854 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20855 vneg_f32 (float32x2_t __a)
20856 {
20857 return -__a;
20858 }
20859
20860 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20861 vneg_f64 (float64x1_t __a)
20862 {
20863 return -__a;
20864 }
20865
20866 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20867 vneg_s8 (int8x8_t __a)
20868 {
20869 return -__a;
20870 }
20871
20872 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20873 vneg_s16 (int16x4_t __a)
20874 {
20875 return -__a;
20876 }
20877
20878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20879 vneg_s32 (int32x2_t __a)
20880 {
20881 return -__a;
20882 }
20883
20884 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20885 vneg_s64 (int64x1_t __a)
20886 {
20887 return -__a;
20888 }
20889
20890 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20891 vnegq_f32 (float32x4_t __a)
20892 {
20893 return -__a;
20894 }
20895
20896 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20897 vnegq_f64 (float64x2_t __a)
20898 {
20899 return -__a;
20900 }
20901
20902 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20903 vnegq_s8 (int8x16_t __a)
20904 {
20905 return -__a;
20906 }
20907
20908 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20909 vnegq_s16 (int16x8_t __a)
20910 {
20911 return -__a;
20912 }
20913
20914 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20915 vnegq_s32 (int32x4_t __a)
20916 {
20917 return -__a;
20918 }
20919
20920 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20921 vnegq_s64 (int64x2_t __a)
20922 {
20923 return -__a;
20924 }
20925
20926 /* vqabs */
20927
20928 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20929 vqabsq_s64 (int64x2_t __a)
20930 {
20931 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
20932 }
20933
20934 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20935 vqabsb_s8 (int8x1_t __a)
20936 {
20937 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
20938 }
20939
20940 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20941 vqabsh_s16 (int16x1_t __a)
20942 {
20943 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
20944 }
20945
20946 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20947 vqabss_s32 (int32x1_t __a)
20948 {
20949 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
20950 }
20951
20952 /* vqadd */
20953
20954 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20955 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
20956 {
20957 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
20958 }
20959
20960 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20961 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
20962 {
20963 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
20964 }
20965
20966 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20967 vqadds_s32 (int32x1_t __a, int32x1_t __b)
20968 {
20969 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
20970 }
20971
20972 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20973 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
20974 {
20975 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
20976 }
20977
20978 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20979 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
20980 {
20981 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
20982 }
20983
20984 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20985 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
20986 {
20987 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
20988 }
20989
20990 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20991 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
20992 {
20993 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
20994 }
20995
20996 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20997 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
20998 {
20999 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21000 }
21001
21002 /* vqdmlal */
21003
21004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21005 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21006 {
21007 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21008 }
21009
21010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21011 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21012 {
21013 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21014 }
21015
21016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21017 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21018 int const __d)
21019 {
21020 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21021 }
21022
21023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21024 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21025 int const __d)
21026 {
21027 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21028 }
21029
21030 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21031 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21032 {
21033 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21034 }
21035
21036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21037 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21038 {
21039 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21040 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21041 }
21042
21043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21044 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21045 {
21046 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21047 }
21048
21049 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21050 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21051 {
21052 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21053 }
21054
21055 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21056 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21057 {
21058 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21059 }
21060
21061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21062 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21063 {
21064 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21065 }
21066
21067 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21068 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21069 int const __d)
21070 {
21071 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21072 }
21073
21074 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21075 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21076 int const __d)
21077 {
21078 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21079 }
21080
21081 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21082 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21083 {
21084 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21085 }
21086
21087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21088 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21089 {
21090 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21091 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21092 }
21093
21094 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21095 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21096 {
21097 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21098 }
21099
21100 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21101 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21102 {
21103 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21104 }
21105
21106 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21107 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21108 {
21109 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21110 }
21111
21112 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21113 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21114 {
21115 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21116 }
21117
21118 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21119 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21120 {
21121 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21122 }
21123
21124 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21125 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21126 {
21127 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21128 }
21129
21130 /* vqdmlsl */
21131
21132 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21133 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21134 {
21135 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21136 }
21137
21138 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21139 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21140 {
21141 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21142 }
21143
21144 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21145 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21146 int const __d)
21147 {
21148 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21149 }
21150
21151 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21152 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21153 int const __d)
21154 {
21155 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21156 }
21157
21158 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21159 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21160 {
21161 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21162 }
21163
21164 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21165 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21166 {
21167 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
21168 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21169 }
21170
21171 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21172 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21173 {
21174 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21175 }
21176
21177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21178 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21179 {
21180 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21181 }
21182
21183 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21184 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21185 {
21186 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21187 }
21188
21189 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21190 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21191 {
21192 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21193 }
21194
21195 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21196 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21197 int const __d)
21198 {
21199 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21200 }
21201
21202 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21203 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21204 int const __d)
21205 {
21206 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21207 }
21208
21209 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21210 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21211 {
21212 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21213 }
21214
21215 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21216 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21217 {
21218 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
21219 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21220 }
21221
21222 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21223 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21224 {
21225 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21226 }
21227
21228 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21229 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21230 {
21231 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21232 }
21233
21234 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21235 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21236 {
21237 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21238 }
21239
21240 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21241 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21242 {
21243 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21244 }
21245
21246 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21247 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21248 {
21249 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21250 }
21251
21252 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21253 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21254 {
21255 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21256 }
21257
21258 /* vqdmulh */
21259
21260 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21261 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21262 {
21263 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21264 }
21265
21266 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21267 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21268 {
21269 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21270 }
21271
21272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21273 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21274 {
21275 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21276 }
21277
21278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21279 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21280 {
21281 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21282 }
21283
21284 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21285 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21286 {
21287 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21288 }
21289
21290 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21291 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21292 {
21293 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21294 }
21295
21296 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21297 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21298 {
21299 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21300 }
21301
21302 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21303 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21304 {
21305 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21306 }
21307
21308 /* vqdmull */
21309
21310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21311 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21312 {
21313 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21314 }
21315
21316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21317 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21318 {
21319 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21320 }
21321
21322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21323 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21324 {
21325 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21326 }
21327
21328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21329 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21330 {
21331 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21332 }
21333
21334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21335 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21336 {
21337 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21338 }
21339
21340 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21341 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21342 {
21343 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
21344 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21345 }
21346
21347 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21348 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21349 {
21350 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21351 }
21352
21353 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21354 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21355 {
21356 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21357 }
21358
21359 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21360 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21361 {
21362 return __builtin_aarch64_sqdmullv2si (__a, __b);
21363 }
21364
21365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21366 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21367 {
21368 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21369 }
21370
21371 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21372 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21373 {
21374 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21375 }
21376
21377 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21378 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21379 {
21380 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21381 }
21382
21383 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21384 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21385 {
21386 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21387 }
21388
21389 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21390 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21391 {
21392 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
21393 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21394 }
21395
21396 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21397 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21398 {
21399 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21400 }
21401
21402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21403 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21404 {
21405 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21406 }
21407
21408 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21409 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21410 {
21411 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21412 }
21413
21414 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21415 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21416 {
21417 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21418 }
21419
21420 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21421 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21422 {
21423 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21424 }
21425
21426 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21427 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21428 {
21429 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21430 }
21431
21432 /* vqmovn */
21433
21434 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21435 vqmovn_s16 (int16x8_t __a)
21436 {
21437 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21438 }
21439
21440 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21441 vqmovn_s32 (int32x4_t __a)
21442 {
21443 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21444 }
21445
21446 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21447 vqmovn_s64 (int64x2_t __a)
21448 {
21449 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21450 }
21451
21452 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21453 vqmovn_u16 (uint16x8_t __a)
21454 {
21455 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21456 }
21457
21458 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21459 vqmovn_u32 (uint32x4_t __a)
21460 {
21461 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21462 }
21463
21464 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21465 vqmovn_u64 (uint64x2_t __a)
21466 {
21467 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21468 }
21469
21470 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21471 vqmovnh_s16 (int16x1_t __a)
21472 {
21473 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21474 }
21475
21476 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21477 vqmovns_s32 (int32x1_t __a)
21478 {
21479 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21480 }
21481
21482 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21483 vqmovnd_s64 (int64x1_t __a)
21484 {
21485 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21486 }
21487
21488 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21489 vqmovnh_u16 (uint16x1_t __a)
21490 {
21491 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21492 }
21493
21494 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21495 vqmovns_u32 (uint32x1_t __a)
21496 {
21497 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21498 }
21499
21500 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21501 vqmovnd_u64 (uint64x1_t __a)
21502 {
21503 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21504 }
21505
21506 /* vqmovun */
21507
21508 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21509 vqmovun_s16 (int16x8_t __a)
21510 {
21511 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21512 }
21513
21514 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21515 vqmovun_s32 (int32x4_t __a)
21516 {
21517 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21518 }
21519
21520 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21521 vqmovun_s64 (int64x2_t __a)
21522 {
21523 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21524 }
21525
21526 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21527 vqmovunh_s16 (int16x1_t __a)
21528 {
21529 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21530 }
21531
21532 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21533 vqmovuns_s32 (int32x1_t __a)
21534 {
21535 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21536 }
21537
21538 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21539 vqmovund_s64 (int64x1_t __a)
21540 {
21541 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21542 }
21543
21544 /* vqneg */
21545
21546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21547 vqnegq_s64 (int64x2_t __a)
21548 {
21549 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21550 }
21551
21552 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21553 vqnegb_s8 (int8x1_t __a)
21554 {
21555 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21556 }
21557
21558 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21559 vqnegh_s16 (int16x1_t __a)
21560 {
21561 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21562 }
21563
21564 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21565 vqnegs_s32 (int32x1_t __a)
21566 {
21567 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21568 }
21569
21570 /* vqrdmulh */
21571
21572 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21573 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21574 {
21575 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21576 }
21577
21578 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21579 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21580 {
21581 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21582 }
21583
21584 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21585 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21586 {
21587 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21588 }
21589
21590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21591 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21592 {
21593 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21594 }
21595
21596 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21597 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21598 {
21599 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21600 }
21601
21602 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21603 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21604 {
21605 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21606 }
21607
21608 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21609 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21610 {
21611 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21612 }
21613
21614 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21615 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21616 {
21617 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21618 }
21619
21620 /* vqrshl */
21621
21622 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21623 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
21624 {
21625 return __builtin_aarch64_sqrshlv8qi (__a, __b);
21626 }
21627
21628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21629 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
21630 {
21631 return __builtin_aarch64_sqrshlv4hi (__a, __b);
21632 }
21633
21634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21635 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
21636 {
21637 return __builtin_aarch64_sqrshlv2si (__a, __b);
21638 }
21639
21640 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21641 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
21642 {
21643 return __builtin_aarch64_sqrshldi (__a, __b);
21644 }
21645
21646 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21647 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
21648 {
21649 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
21650 }
21651
21652 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21653 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
21654 {
21655 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
21656 }
21657
21658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21659 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
21660 {
21661 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
21662 }
21663
21664 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21665 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
21666 {
21667 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
21668 }
21669
21670 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21671 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
21672 {
21673 return __builtin_aarch64_sqrshlv16qi (__a, __b);
21674 }
21675
21676 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21677 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
21678 {
21679 return __builtin_aarch64_sqrshlv8hi (__a, __b);
21680 }
21681
21682 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21683 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
21684 {
21685 return __builtin_aarch64_sqrshlv4si (__a, __b);
21686 }
21687
21688 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21689 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
21690 {
21691 return __builtin_aarch64_sqrshlv2di (__a, __b);
21692 }
21693
21694 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21695 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21696 {
21697 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
21698 }
21699
21700 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21701 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21702 {
21703 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
21704 }
21705
21706 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21707 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21708 {
21709 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
21710 }
21711
21712 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21713 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21714 {
21715 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
21716 }
21717
21718 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21719 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
21720 {
21721 return __builtin_aarch64_sqrshlqi (__a, __b);
21722 }
21723
21724 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21725 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
21726 {
21727 return __builtin_aarch64_sqrshlhi (__a, __b);
21728 }
21729
21730 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21731 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
21732 {
21733 return __builtin_aarch64_sqrshlsi (__a, __b);
21734 }
21735
21736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21737 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
21738 {
21739 return __builtin_aarch64_sqrshldi (__a, __b);
21740 }
21741
21742 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21743 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21744 {
21745 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
21746 }
21747
21748 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21749 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21750 {
21751 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
21752 }
21753
21754 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21755 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
21756 {
21757 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
21758 }
21759
21760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21761 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
21762 {
21763 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
21764 }
21765
21766 /* vqrshrn */
21767
21768 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21769 vqrshrn_n_s16 (int16x8_t __a, const int __b)
21770 {
21771 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
21772 }
21773
21774 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21775 vqrshrn_n_s32 (int32x4_t __a, const int __b)
21776 {
21777 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
21778 }
21779
21780 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21781 vqrshrn_n_s64 (int64x2_t __a, const int __b)
21782 {
21783 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
21784 }
21785
21786 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21787 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
21788 {
21789 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
21790 }
21791
21792 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21793 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
21794 {
21795 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
21796 }
21797
21798 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21799 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
21800 {
21801 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
21802 }
21803
21804 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21805 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
21806 {
21807 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
21808 }
21809
21810 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21811 vqrshrns_n_s32 (int32x1_t __a, const int __b)
21812 {
21813 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
21814 }
21815
21816 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21817 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
21818 {
21819 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
21820 }
21821
21822 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21823 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
21824 {
21825 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
21826 }
21827
21828 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21829 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
21830 {
21831 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
21832 }
21833
21834 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21835 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
21836 {
21837 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
21838 }
21839
21840 /* vqrshrun */
21841
21842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21843 vqrshrun_n_s16 (int16x8_t __a, const int __b)
21844 {
21845 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
21846 }
21847
21848 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21849 vqrshrun_n_s32 (int32x4_t __a, const int __b)
21850 {
21851 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
21852 }
21853
21854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21855 vqrshrun_n_s64 (int64x2_t __a, const int __b)
21856 {
21857 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
21858 }
21859
21860 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21861 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
21862 {
21863 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
21864 }
21865
21866 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21867 vqrshruns_n_s32 (int32x1_t __a, const int __b)
21868 {
21869 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
21870 }
21871
21872 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21873 vqrshrund_n_s64 (int64x1_t __a, const int __b)
21874 {
21875 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
21876 }
21877
21878 /* vqshl */
21879
21880 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21881 vqshl_s8 (int8x8_t __a, int8x8_t __b)
21882 {
21883 return __builtin_aarch64_sqshlv8qi (__a, __b);
21884 }
21885
21886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21887 vqshl_s16 (int16x4_t __a, int16x4_t __b)
21888 {
21889 return __builtin_aarch64_sqshlv4hi (__a, __b);
21890 }
21891
21892 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21893 vqshl_s32 (int32x2_t __a, int32x2_t __b)
21894 {
21895 return __builtin_aarch64_sqshlv2si (__a, __b);
21896 }
21897
21898 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21899 vqshl_s64 (int64x1_t __a, int64x1_t __b)
21900 {
21901 return __builtin_aarch64_sqshldi (__a, __b);
21902 }
21903
21904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21905 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
21906 {
21907 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
21908 }
21909
21910 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21911 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
21912 {
21913 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
21914 }
21915
21916 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21917 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
21918 {
21919 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
21920 }
21921
21922 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21923 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
21924 {
21925 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
21926 }
21927
21928 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21929 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
21930 {
21931 return __builtin_aarch64_sqshlv16qi (__a, __b);
21932 }
21933
21934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21935 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
21936 {
21937 return __builtin_aarch64_sqshlv8hi (__a, __b);
21938 }
21939
21940 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21941 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
21942 {
21943 return __builtin_aarch64_sqshlv4si (__a, __b);
21944 }
21945
21946 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21947 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
21948 {
21949 return __builtin_aarch64_sqshlv2di (__a, __b);
21950 }
21951
21952 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21953 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
21954 {
21955 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
21956 }
21957
21958 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21959 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
21960 {
21961 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
21962 }
21963
21964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21965 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
21966 {
21967 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
21968 }
21969
21970 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21971 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
21972 {
21973 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
21974 }
21975
21976 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21977 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
21978 {
21979 return __builtin_aarch64_sqshlqi (__a, __b);
21980 }
21981
21982 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21983 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
21984 {
21985 return __builtin_aarch64_sqshlhi (__a, __b);
21986 }
21987
21988 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21989 vqshls_s32 (int32x1_t __a, int32x1_t __b)
21990 {
21991 return __builtin_aarch64_sqshlsi (__a, __b);
21992 }
21993
21994 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21995 vqshld_s64 (int64x1_t __a, int64x1_t __b)
21996 {
21997 return __builtin_aarch64_sqshldi (__a, __b);
21998 }
21999
22000 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22001 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22002 {
22003 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22004 }
22005
22006 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22007 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22008 {
22009 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22010 }
22011
22012 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22013 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22014 {
22015 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22016 }
22017
22018 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22019 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22020 {
22021 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22022 }
22023
22024 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22025 vqshl_n_s8 (int8x8_t __a, const int __b)
22026 {
22027 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22028 }
22029
22030 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22031 vqshl_n_s16 (int16x4_t __a, const int __b)
22032 {
22033 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22034 }
22035
22036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22037 vqshl_n_s32 (int32x2_t __a, const int __b)
22038 {
22039 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22040 }
22041
22042 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22043 vqshl_n_s64 (int64x1_t __a, const int __b)
22044 {
22045 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22046 }
22047
22048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22049 vqshl_n_u8 (uint8x8_t __a, const int __b)
22050 {
22051 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22052 }
22053
22054 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22055 vqshl_n_u16 (uint16x4_t __a, const int __b)
22056 {
22057 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22058 }
22059
22060 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22061 vqshl_n_u32 (uint32x2_t __a, const int __b)
22062 {
22063 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22064 }
22065
22066 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22067 vqshl_n_u64 (uint64x1_t __a, const int __b)
22068 {
22069 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22070 }
22071
22072 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22073 vqshlq_n_s8 (int8x16_t __a, const int __b)
22074 {
22075 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22076 }
22077
22078 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22079 vqshlq_n_s16 (int16x8_t __a, const int __b)
22080 {
22081 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22082 }
22083
22084 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22085 vqshlq_n_s32 (int32x4_t __a, const int __b)
22086 {
22087 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22088 }
22089
22090 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22091 vqshlq_n_s64 (int64x2_t __a, const int __b)
22092 {
22093 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22094 }
22095
22096 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22097 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22098 {
22099 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22100 }
22101
22102 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22103 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22104 {
22105 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22106 }
22107
22108 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22109 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22110 {
22111 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22112 }
22113
22114 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22115 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22116 {
22117 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22118 }
22119
22120 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22121 vqshlb_n_s8 (int8x1_t __a, const int __b)
22122 {
22123 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22124 }
22125
22126 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22127 vqshlh_n_s16 (int16x1_t __a, const int __b)
22128 {
22129 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22130 }
22131
22132 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22133 vqshls_n_s32 (int32x1_t __a, const int __b)
22134 {
22135 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22136 }
22137
22138 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22139 vqshld_n_s64 (int64x1_t __a, const int __b)
22140 {
22141 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22142 }
22143
22144 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22145 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22146 {
22147 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22148 }
22149
22150 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22151 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22152 {
22153 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22154 }
22155
22156 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22157 vqshls_n_u32 (uint32x1_t __a, const int __b)
22158 {
22159 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22160 }
22161
22162 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22163 vqshld_n_u64 (uint64x1_t __a, const int __b)
22164 {
22165 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22166 }
22167
22168 /* vqshlu */
22169
22170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22171 vqshlu_n_s8 (int8x8_t __a, const int __b)
22172 {
22173 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22174 }
22175
22176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22177 vqshlu_n_s16 (int16x4_t __a, const int __b)
22178 {
22179 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22180 }
22181
22182 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22183 vqshlu_n_s32 (int32x2_t __a, const int __b)
22184 {
22185 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22186 }
22187
22188 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22189 vqshlu_n_s64 (int64x1_t __a, const int __b)
22190 {
22191 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22192 }
22193
22194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22195 vqshluq_n_s8 (int8x16_t __a, const int __b)
22196 {
22197 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22198 }
22199
22200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22201 vqshluq_n_s16 (int16x8_t __a, const int __b)
22202 {
22203 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22204 }
22205
22206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22207 vqshluq_n_s32 (int32x4_t __a, const int __b)
22208 {
22209 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22210 }
22211
22212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22213 vqshluq_n_s64 (int64x2_t __a, const int __b)
22214 {
22215 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22216 }
22217
22218 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22219 vqshlub_n_s8 (int8x1_t __a, const int __b)
22220 {
22221 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22222 }
22223
22224 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22225 vqshluh_n_s16 (int16x1_t __a, const int __b)
22226 {
22227 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22228 }
22229
22230 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22231 vqshlus_n_s32 (int32x1_t __a, const int __b)
22232 {
22233 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22234 }
22235
22236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22237 vqshlud_n_s64 (int64x1_t __a, const int __b)
22238 {
22239 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22240 }
22241
22242 /* vqshrn */
22243
22244 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22245 vqshrn_n_s16 (int16x8_t __a, const int __b)
22246 {
22247 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22248 }
22249
22250 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22251 vqshrn_n_s32 (int32x4_t __a, const int __b)
22252 {
22253 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22254 }
22255
22256 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22257 vqshrn_n_s64 (int64x2_t __a, const int __b)
22258 {
22259 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22260 }
22261
22262 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22263 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22264 {
22265 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22266 }
22267
22268 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22269 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22270 {
22271 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22272 }
22273
22274 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22275 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22276 {
22277 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22278 }
22279
22280 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22281 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22282 {
22283 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22284 }
22285
22286 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22287 vqshrns_n_s32 (int32x1_t __a, const int __b)
22288 {
22289 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22290 }
22291
22292 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22293 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22294 {
22295 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22296 }
22297
22298 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22299 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22300 {
22301 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22302 }
22303
22304 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22305 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22306 {
22307 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22308 }
22309
22310 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22311 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22312 {
22313 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22314 }
22315
22316 /* vqshrun */
22317
22318 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22319 vqshrun_n_s16 (int16x8_t __a, const int __b)
22320 {
22321 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22322 }
22323
22324 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22325 vqshrun_n_s32 (int32x4_t __a, const int __b)
22326 {
22327 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22328 }
22329
22330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22331 vqshrun_n_s64 (int64x2_t __a, const int __b)
22332 {
22333 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22334 }
22335
22336 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22337 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22338 {
22339 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22340 }
22341
22342 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22343 vqshruns_n_s32 (int32x1_t __a, const int __b)
22344 {
22345 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22346 }
22347
22348 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22349 vqshrund_n_s64 (int64x1_t __a, const int __b)
22350 {
22351 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22352 }
22353
22354 /* vqsub */
22355
22356 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22357 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22358 {
22359 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22360 }
22361
22362 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22363 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22364 {
22365 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22366 }
22367
22368 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22369 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22370 {
22371 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22372 }
22373
22374 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22375 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22376 {
22377 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22378 }
22379
22380 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22381 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22382 {
22383 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22384 }
22385
22386 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22387 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22388 {
22389 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22390 }
22391
22392 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22393 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22394 {
22395 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22396 }
22397
22398 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22399 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22400 {
22401 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22402 }
22403
22404 /* vrecpe */
22405
22406 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22407 vrecpes_f32 (float32_t __a)
22408 {
22409 return __builtin_aarch64_frecpesf (__a);
22410 }
22411
22412 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22413 vrecped_f64 (float64_t __a)
22414 {
22415 return __builtin_aarch64_frecpedf (__a);
22416 }
22417
22418 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22419 vrecpe_f32 (float32x2_t __a)
22420 {
22421 return __builtin_aarch64_frecpev2sf (__a);
22422 }
22423
22424 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22425 vrecpeq_f32 (float32x4_t __a)
22426 {
22427 return __builtin_aarch64_frecpev4sf (__a);
22428 }
22429
22430 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22431 vrecpeq_f64 (float64x2_t __a)
22432 {
22433 return __builtin_aarch64_frecpev2df (__a);
22434 }
22435
22436 /* vrecps */
22437
22438 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22439 vrecpss_f32 (float32_t __a, float32_t __b)
22440 {
22441 return __builtin_aarch64_frecpssf (__a, __b);
22442 }
22443
22444 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22445 vrecpsd_f64 (float64_t __a, float64_t __b)
22446 {
22447 return __builtin_aarch64_frecpsdf (__a, __b);
22448 }
22449
22450 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22451 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22452 {
22453 return __builtin_aarch64_frecpsv2sf (__a, __b);
22454 }
22455
22456 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22457 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22458 {
22459 return __builtin_aarch64_frecpsv4sf (__a, __b);
22460 }
22461
22462 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22463 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22464 {
22465 return __builtin_aarch64_frecpsv2df (__a, __b);
22466 }
22467
22468 /* vrecpx */
22469
22470 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22471 vrecpxs_f32 (float32_t __a)
22472 {
22473 return __builtin_aarch64_frecpxsf (__a);
22474 }
22475
22476 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22477 vrecpxd_f64 (float64_t __a)
22478 {
22479 return __builtin_aarch64_frecpxdf (__a);
22480 }
22481
22482 /* vrnd */
22483
22484 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22485 vrnd_f32 (float32x2_t __a)
22486 {
22487 return __builtin_aarch64_btruncv2sf (__a);
22488 }
22489
22490 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22491 vrndq_f32 (float32x4_t __a)
22492 {
22493 return __builtin_aarch64_btruncv4sf (__a);
22494 }
22495
22496 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22497 vrndq_f64 (float64x2_t __a)
22498 {
22499 return __builtin_aarch64_btruncv2df (__a);
22500 }
22501
22502 /* vrnda */
22503
22504 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22505 vrnda_f32 (float32x2_t __a)
22506 {
22507 return __builtin_aarch64_roundv2sf (__a);
22508 }
22509
22510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22511 vrndaq_f32 (float32x4_t __a)
22512 {
22513 return __builtin_aarch64_roundv4sf (__a);
22514 }
22515
22516 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22517 vrndaq_f64 (float64x2_t __a)
22518 {
22519 return __builtin_aarch64_roundv2df (__a);
22520 }
22521
22522 /* vrndi */
22523
22524 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22525 vrndi_f32 (float32x2_t __a)
22526 {
22527 return __builtin_aarch64_nearbyintv2sf (__a);
22528 }
22529
22530 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22531 vrndiq_f32 (float32x4_t __a)
22532 {
22533 return __builtin_aarch64_nearbyintv4sf (__a);
22534 }
22535
22536 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22537 vrndiq_f64 (float64x2_t __a)
22538 {
22539 return __builtin_aarch64_nearbyintv2df (__a);
22540 }
22541
22542 /* vrndm */
22543
22544 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22545 vrndm_f32 (float32x2_t __a)
22546 {
22547 return __builtin_aarch64_floorv2sf (__a);
22548 }
22549
22550 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22551 vrndmq_f32 (float32x4_t __a)
22552 {
22553 return __builtin_aarch64_floorv4sf (__a);
22554 }
22555
22556 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22557 vrndmq_f64 (float64x2_t __a)
22558 {
22559 return __builtin_aarch64_floorv2df (__a);
22560 }
22561
22562 /* vrndn */
22563
22564 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22565 vrndn_f32 (float32x2_t __a)
22566 {
22567 return __builtin_aarch64_frintnv2sf (__a);
22568 }
22569 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22570 vrndnq_f32 (float32x4_t __a)
22571 {
22572 return __builtin_aarch64_frintnv4sf (__a);
22573 }
22574
22575 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22576 vrndnq_f64 (float64x2_t __a)
22577 {
22578 return __builtin_aarch64_frintnv2df (__a);
22579 }
22580
22581 /* vrndp */
22582
22583 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22584 vrndp_f32 (float32x2_t __a)
22585 {
22586 return __builtin_aarch64_ceilv2sf (__a);
22587 }
22588
22589 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22590 vrndpq_f32 (float32x4_t __a)
22591 {
22592 return __builtin_aarch64_ceilv4sf (__a);
22593 }
22594
22595 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22596 vrndpq_f64 (float64x2_t __a)
22597 {
22598 return __builtin_aarch64_ceilv2df (__a);
22599 }
22600
22601 /* vrndx */
22602
22603 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22604 vrndx_f32 (float32x2_t __a)
22605 {
22606 return __builtin_aarch64_rintv2sf (__a);
22607 }
22608
22609 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22610 vrndxq_f32 (float32x4_t __a)
22611 {
22612 return __builtin_aarch64_rintv4sf (__a);
22613 }
22614
22615 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22616 vrndxq_f64 (float64x2_t __a)
22617 {
22618 return __builtin_aarch64_rintv2df (__a);
22619 }
22620
22621 /* vrshl */
22622
22623 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22624 vrshl_s8 (int8x8_t __a, int8x8_t __b)
22625 {
22626 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
22627 }
22628
22629 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22630 vrshl_s16 (int16x4_t __a, int16x4_t __b)
22631 {
22632 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
22633 }
22634
22635 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22636 vrshl_s32 (int32x2_t __a, int32x2_t __b)
22637 {
22638 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
22639 }
22640
22641 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22642 vrshl_s64 (int64x1_t __a, int64x1_t __b)
22643 {
22644 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22645 }
22646
22647 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22648 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
22649 {
22650 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
22651 }
22652
22653 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22654 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
22655 {
22656 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
22657 }
22658
22659 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22660 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
22661 {
22662 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
22663 }
22664
22665 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22666 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
22667 {
22668 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
22669 }
22670
22671 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22672 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
22673 {
22674 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
22675 }
22676
22677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22678 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
22679 {
22680 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
22681 }
22682
22683 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22684 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
22685 {
22686 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
22687 }
22688
22689 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22690 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
22691 {
22692 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
22693 }
22694
22695 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22696 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22697 {
22698 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
22699 }
22700
22701 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22702 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22703 {
22704 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
22705 }
22706
22707 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22708 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22709 {
22710 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
22711 }
22712
22713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22714 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22715 {
22716 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
22717 }
22718
22719 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22720 vrshld_s64 (int64x1_t __a, int64x1_t __b)
22721 {
22722 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22723 }
22724
22725 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22726 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22727 {
22728 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
22729 }
22730
22731 /* vrshr */
22732
22733 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22734 vrshr_n_s8 (int8x8_t __a, const int __b)
22735 {
22736 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
22737 }
22738
22739 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22740 vrshr_n_s16 (int16x4_t __a, const int __b)
22741 {
22742 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
22743 }
22744
22745 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22746 vrshr_n_s32 (int32x2_t __a, const int __b)
22747 {
22748 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
22749 }
22750
22751 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22752 vrshr_n_s64 (int64x1_t __a, const int __b)
22753 {
22754 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22755 }
22756
22757 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22758 vrshr_n_u8 (uint8x8_t __a, const int __b)
22759 {
22760 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
22761 }
22762
22763 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22764 vrshr_n_u16 (uint16x4_t __a, const int __b)
22765 {
22766 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
22767 }
22768
22769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22770 vrshr_n_u32 (uint32x2_t __a, const int __b)
22771 {
22772 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
22773 }
22774
22775 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22776 vrshr_n_u64 (uint64x1_t __a, const int __b)
22777 {
22778 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
22779 }
22780
22781 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22782 vrshrq_n_s8 (int8x16_t __a, const int __b)
22783 {
22784 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
22785 }
22786
22787 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22788 vrshrq_n_s16 (int16x8_t __a, const int __b)
22789 {
22790 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
22791 }
22792
22793 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22794 vrshrq_n_s32 (int32x4_t __a, const int __b)
22795 {
22796 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
22797 }
22798
22799 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22800 vrshrq_n_s64 (int64x2_t __a, const int __b)
22801 {
22802 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
22803 }
22804
22805 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22806 vrshrq_n_u8 (uint8x16_t __a, const int __b)
22807 {
22808 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
22809 }
22810
22811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22812 vrshrq_n_u16 (uint16x8_t __a, const int __b)
22813 {
22814 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
22815 }
22816
22817 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22818 vrshrq_n_u32 (uint32x4_t __a, const int __b)
22819 {
22820 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
22821 }
22822
22823 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22824 vrshrq_n_u64 (uint64x2_t __a, const int __b)
22825 {
22826 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
22827 }
22828
22829 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22830 vrshrd_n_s64 (int64x1_t __a, const int __b)
22831 {
22832 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22833 }
22834
22835 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22836 vrshrd_n_u64 (uint64x1_t __a, const int __b)
22837 {
22838 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
22839 }
22840
22841 /* vrsra */
22842
22843 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22844 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22845 {
22846 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
22847 }
22848
22849 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22850 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22851 {
22852 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
22853 }
22854
22855 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22856 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22857 {
22858 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
22859 }
22860
22861 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22862 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22863 {
22864 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22865 }
22866
22867 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22868 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22869 {
22870 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
22871 (int8x8_t) __b, __c);
22872 }
22873
22874 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22875 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22876 {
22877 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
22878 (int16x4_t) __b, __c);
22879 }
22880
22881 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22882 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22883 {
22884 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
22885 (int32x2_t) __b, __c);
22886 }
22887
22888 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22889 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22890 {
22891 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
22892 (int64x1_t) __b, __c);
22893 }
22894
22895 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22896 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22897 {
22898 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
22899 }
22900
22901 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22902 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22903 {
22904 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
22905 }
22906
22907 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22908 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22909 {
22910 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
22911 }
22912
22913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22914 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22915 {
22916 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
22917 }
22918
22919 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22920 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22921 {
22922 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
22923 (int8x16_t) __b, __c);
22924 }
22925
22926 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22927 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22928 {
22929 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
22930 (int16x8_t) __b, __c);
22931 }
22932
22933 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22934 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22935 {
22936 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
22937 (int32x4_t) __b, __c);
22938 }
22939
22940 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22941 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22942 {
22943 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
22944 (int64x2_t) __b, __c);
22945 }
22946
22947 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22948 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22949 {
22950 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22951 }
22952
22953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22954 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22955 {
22956 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
22957 }
22958
22959 #ifdef __ARM_FEATURE_CRYPTO
22960
22961 /* vsha1 */
22962
22963 static __inline uint32x4_t
22964 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22965 {
22966 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
22967 }
22968 static __inline uint32x4_t
22969 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22970 {
22971 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
22972 }
22973 static __inline uint32x4_t
22974 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22975 {
22976 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
22977 }
22978
22979 static __inline uint32_t
22980 vsha1h_u32 (uint32_t hash_e)
22981 {
22982 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
22983 }
22984
22985 static __inline uint32x4_t
22986 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
22987 {
22988 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
22989 }
22990
22991 static __inline uint32x4_t
22992 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
22993 {
22994 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
22995 }
22996
22997 static __inline uint32x4_t
22998 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
22999 {
23000 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
23001 }
23002
23003 static __inline uint32x4_t
23004 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
23005 {
23006 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
23007 }
23008
23009 static __inline uint32x4_t
23010 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
23011 {
23012 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
23013 }
23014
23015 static __inline uint32x4_t
23016 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
23017 {
23018 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
23019 }
23020
23021 static __inline poly128_t
23022 vmull_p64 (poly64_t a, poly64_t b)
23023 {
23024 return
23025 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
23026 }
23027
23028 static __inline poly128_t
23029 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
23030 {
23031 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
23032 }
23033
23034 #endif
23035
23036 /* vshl */
23037
23038 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23039 vshl_n_s8 (int8x8_t __a, const int __b)
23040 {
23041 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23042 }
23043
23044 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23045 vshl_n_s16 (int16x4_t __a, const int __b)
23046 {
23047 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23048 }
23049
23050 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23051 vshl_n_s32 (int32x2_t __a, const int __b)
23052 {
23053 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23054 }
23055
23056 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23057 vshl_n_s64 (int64x1_t __a, const int __b)
23058 {
23059 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23060 }
23061
23062 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23063 vshl_n_u8 (uint8x8_t __a, const int __b)
23064 {
23065 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23066 }
23067
23068 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23069 vshl_n_u16 (uint16x4_t __a, const int __b)
23070 {
23071 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23072 }
23073
23074 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23075 vshl_n_u32 (uint32x2_t __a, const int __b)
23076 {
23077 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23078 }
23079
23080 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23081 vshl_n_u64 (uint64x1_t __a, const int __b)
23082 {
23083 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23084 }
23085
23086 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23087 vshlq_n_s8 (int8x16_t __a, const int __b)
23088 {
23089 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23090 }
23091
23092 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23093 vshlq_n_s16 (int16x8_t __a, const int __b)
23094 {
23095 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23096 }
23097
23098 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23099 vshlq_n_s32 (int32x4_t __a, const int __b)
23100 {
23101 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23102 }
23103
23104 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23105 vshlq_n_s64 (int64x2_t __a, const int __b)
23106 {
23107 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23108 }
23109
23110 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23111 vshlq_n_u8 (uint8x16_t __a, const int __b)
23112 {
23113 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23114 }
23115
23116 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23117 vshlq_n_u16 (uint16x8_t __a, const int __b)
23118 {
23119 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23120 }
23121
23122 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23123 vshlq_n_u32 (uint32x4_t __a, const int __b)
23124 {
23125 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23126 }
23127
23128 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23129 vshlq_n_u64 (uint64x2_t __a, const int __b)
23130 {
23131 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23132 }
23133
23134 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23135 vshld_n_s64 (int64x1_t __a, const int __b)
23136 {
23137 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23138 }
23139
23140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23141 vshld_n_u64 (uint64x1_t __a, const int __b)
23142 {
23143 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23144 }
23145
23146 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23147 vshl_s8 (int8x8_t __a, int8x8_t __b)
23148 {
23149 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23150 }
23151
23152 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23153 vshl_s16 (int16x4_t __a, int16x4_t __b)
23154 {
23155 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23156 }
23157
23158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23159 vshl_s32 (int32x2_t __a, int32x2_t __b)
23160 {
23161 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23162 }
23163
23164 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23165 vshl_s64 (int64x1_t __a, int64x1_t __b)
23166 {
23167 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23168 }
23169
23170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23171 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23172 {
23173 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23174 }
23175
23176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23177 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23178 {
23179 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23180 }
23181
23182 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23183 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23184 {
23185 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23186 }
23187
23188 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23189 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23190 {
23191 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23192 }
23193
23194 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23195 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23196 {
23197 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23198 }
23199
23200 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23201 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23202 {
23203 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23204 }
23205
23206 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23207 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23208 {
23209 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23210 }
23211
23212 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23213 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23214 {
23215 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23216 }
23217
23218 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23219 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23220 {
23221 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23222 }
23223
23224 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23225 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23226 {
23227 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23228 }
23229
23230 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23231 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23232 {
23233 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23234 }
23235
23236 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23237 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23238 {
23239 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23240 }
23241
23242 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23243 vshld_s64 (int64x1_t __a, int64x1_t __b)
23244 {
23245 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23246 }
23247
23248 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23249 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23250 {
23251 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23252 }
23253
23254 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23255 vshll_high_n_s8 (int8x16_t __a, const int __b)
23256 {
23257 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23258 }
23259
23260 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23261 vshll_high_n_s16 (int16x8_t __a, const int __b)
23262 {
23263 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23264 }
23265
23266 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23267 vshll_high_n_s32 (int32x4_t __a, const int __b)
23268 {
23269 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23270 }
23271
23272 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23273 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23274 {
23275 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23276 }
23277
23278 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23279 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23280 {
23281 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23282 }
23283
23284 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23285 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23286 {
23287 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23288 }
23289
23290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23291 vshll_n_s8 (int8x8_t __a, const int __b)
23292 {
23293 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23294 }
23295
23296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23297 vshll_n_s16 (int16x4_t __a, const int __b)
23298 {
23299 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23300 }
23301
23302 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23303 vshll_n_s32 (int32x2_t __a, const int __b)
23304 {
23305 return __builtin_aarch64_sshll_nv2si (__a, __b);
23306 }
23307
23308 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23309 vshll_n_u8 (uint8x8_t __a, const int __b)
23310 {
23311 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23312 }
23313
23314 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23315 vshll_n_u16 (uint16x4_t __a, const int __b)
23316 {
23317 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23318 }
23319
23320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23321 vshll_n_u32 (uint32x2_t __a, const int __b)
23322 {
23323 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23324 }
23325
23326 /* vshr */
23327
23328 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23329 vshr_n_s8 (int8x8_t __a, const int __b)
23330 {
23331 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23332 }
23333
23334 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23335 vshr_n_s16 (int16x4_t __a, const int __b)
23336 {
23337 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23338 }
23339
23340 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23341 vshr_n_s32 (int32x2_t __a, const int __b)
23342 {
23343 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23344 }
23345
23346 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23347 vshr_n_s64 (int64x1_t __a, const int __b)
23348 {
23349 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23350 }
23351
23352 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23353 vshr_n_u8 (uint8x8_t __a, const int __b)
23354 {
23355 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23356 }
23357
23358 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23359 vshr_n_u16 (uint16x4_t __a, const int __b)
23360 {
23361 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23362 }
23363
23364 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23365 vshr_n_u32 (uint32x2_t __a, const int __b)
23366 {
23367 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23368 }
23369
23370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23371 vshr_n_u64 (uint64x1_t __a, const int __b)
23372 {
23373 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23374 }
23375
23376 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23377 vshrq_n_s8 (int8x16_t __a, const int __b)
23378 {
23379 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23380 }
23381
23382 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23383 vshrq_n_s16 (int16x8_t __a, const int __b)
23384 {
23385 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23386 }
23387
23388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23389 vshrq_n_s32 (int32x4_t __a, const int __b)
23390 {
23391 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23392 }
23393
23394 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23395 vshrq_n_s64 (int64x2_t __a, const int __b)
23396 {
23397 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23398 }
23399
23400 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23401 vshrq_n_u8 (uint8x16_t __a, const int __b)
23402 {
23403 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23404 }
23405
23406 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23407 vshrq_n_u16 (uint16x8_t __a, const int __b)
23408 {
23409 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23410 }
23411
23412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23413 vshrq_n_u32 (uint32x4_t __a, const int __b)
23414 {
23415 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23416 }
23417
23418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23419 vshrq_n_u64 (uint64x2_t __a, const int __b)
23420 {
23421 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23422 }
23423
23424 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23425 vshrd_n_s64 (int64x1_t __a, const int __b)
23426 {
23427 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23428 }
23429
23430 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23431 vshrd_n_u64 (uint64x1_t __a, const int __b)
23432 {
23433 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23434 }
23435
23436 /* vsli */
23437
23438 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23439 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23440 {
23441 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23442 }
23443
23444 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23445 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23446 {
23447 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23448 }
23449
23450 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23451 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23452 {
23453 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23454 }
23455
23456 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23457 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23458 {
23459 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23460 }
23461
23462 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23463 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23464 {
23465 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23466 (int8x8_t) __b, __c);
23467 }
23468
23469 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23470 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23471 {
23472 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23473 (int16x4_t) __b, __c);
23474 }
23475
23476 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23477 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23478 {
23479 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23480 (int32x2_t) __b, __c);
23481 }
23482
23483 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23484 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23485 {
23486 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23487 (int64x1_t) __b, __c);
23488 }
23489
23490 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23491 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23492 {
23493 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23494 }
23495
23496 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23497 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23498 {
23499 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23500 }
23501
23502 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23503 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23504 {
23505 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23506 }
23507
23508 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23509 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23510 {
23511 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23512 }
23513
23514 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23515 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23516 {
23517 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23518 (int8x16_t) __b, __c);
23519 }
23520
23521 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23522 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23523 {
23524 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23525 (int16x8_t) __b, __c);
23526 }
23527
23528 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23529 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23530 {
23531 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23532 (int32x4_t) __b, __c);
23533 }
23534
23535 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23536 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23537 {
23538 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23539 (int64x2_t) __b, __c);
23540 }
23541
23542 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23543 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23544 {
23545 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23546 }
23547
23548 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23549 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23550 {
23551 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23552 }
23553
23554 /* vsqadd */
23555
23556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23557 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23558 {
23559 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23560 (int8x8_t) __b);
23561 }
23562
23563 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23564 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23565 {
23566 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23567 (int16x4_t) __b);
23568 }
23569
23570 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23571 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23572 {
23573 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23574 (int32x2_t) __b);
23575 }
23576
23577 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23578 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23579 {
23580 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23581 }
23582
23583 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23584 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23585 {
23586 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23587 (int8x16_t) __b);
23588 }
23589
23590 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23591 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23592 {
23593 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23594 (int16x8_t) __b);
23595 }
23596
23597 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23598 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23599 {
23600 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23601 (int32x4_t) __b);
23602 }
23603
23604 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23605 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23606 {
23607 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23608 (int64x2_t) __b);
23609 }
23610
23611 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23612 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23613 {
23614 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23615 }
23616
23617 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23618 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23619 {
23620 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23621 }
23622
23623 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23624 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23625 {
23626 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23627 }
23628
23629 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23630 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23631 {
23632 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23633 }
23634
23635 /* vsqrt */
23636 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23637 vsqrt_f32 (float32x2_t a)
23638 {
23639 return __builtin_aarch64_sqrtv2sf (a);
23640 }
23641
23642 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23643 vsqrtq_f32 (float32x4_t a)
23644 {
23645 return __builtin_aarch64_sqrtv4sf (a);
23646 }
23647
23648 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23649 vsqrtq_f64 (float64x2_t a)
23650 {
23651 return __builtin_aarch64_sqrtv2df (a);
23652 }
23653
23654 /* vsra */
23655
23656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23657 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23658 {
23659 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23660 }
23661
23662 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23663 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23664 {
23665 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23666 }
23667
23668 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23669 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23670 {
23671 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23672 }
23673
23674 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23675 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23676 {
23677 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23678 }
23679
23680 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23681 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23682 {
23683 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23684 (int8x8_t) __b, __c);
23685 }
23686
23687 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23688 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23689 {
23690 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23691 (int16x4_t) __b, __c);
23692 }
23693
23694 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23695 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23696 {
23697 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23698 (int32x2_t) __b, __c);
23699 }
23700
23701 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23702 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23703 {
23704 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
23705 (int64x1_t) __b, __c);
23706 }
23707
23708 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23709 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23710 {
23711 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
23712 }
23713
23714 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23715 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23716 {
23717 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
23718 }
23719
23720 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23721 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23722 {
23723 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
23724 }
23725
23726 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23727 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23728 {
23729 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
23730 }
23731
23732 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23733 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23734 {
23735 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
23736 (int8x16_t) __b, __c);
23737 }
23738
23739 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23740 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23741 {
23742 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
23743 (int16x8_t) __b, __c);
23744 }
23745
23746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23747 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23748 {
23749 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
23750 (int32x4_t) __b, __c);
23751 }
23752
23753 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23754 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23755 {
23756 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
23757 (int64x2_t) __b, __c);
23758 }
23759
23760 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23761 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23762 {
23763 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23764 }
23765
23766 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23767 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23768 {
23769 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
23770 }
23771
23772 /* vsri */
23773
23774 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23775 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23776 {
23777 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
23778 }
23779
23780 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23781 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23782 {
23783 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
23784 }
23785
23786 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23787 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23788 {
23789 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23790 }
23791
23792 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23793 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23794 {
23795 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23796 }
23797
23798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23799 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23800 {
23801 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
23802 (int8x8_t) __b, __c);
23803 }
23804
23805 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23806 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23807 {
23808 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
23809 (int16x4_t) __b, __c);
23810 }
23811
23812 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23813 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23814 {
23815 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
23816 (int32x2_t) __b, __c);
23817 }
23818
23819 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23820 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23821 {
23822 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
23823 (int64x1_t) __b, __c);
23824 }
23825
23826 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23827 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23828 {
23829 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23830 }
23831
23832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23833 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23834 {
23835 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
23836 }
23837
23838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23839 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23840 {
23841 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
23842 }
23843
23844 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23845 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23846 {
23847 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
23848 }
23849
23850 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23851 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23852 {
23853 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
23854 (int8x16_t) __b, __c);
23855 }
23856
23857 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23858 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23859 {
23860 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
23861 (int16x8_t) __b, __c);
23862 }
23863
23864 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23865 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23866 {
23867 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
23868 (int32x4_t) __b, __c);
23869 }
23870
23871 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23872 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23873 {
23874 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
23875 (int64x2_t) __b, __c);
23876 }
23877
23878 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23879 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23880 {
23881 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23882 }
23883
23884 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23885 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23886 {
23887 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
23888 }
23889
23890 /* vst1 */
23891
23892 __extension__ static __inline void __attribute__ ((__always_inline__))
23893 vst1_f32 (float32_t *a, float32x2_t b)
23894 {
23895 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
23896 }
23897
23898 __extension__ static __inline void __attribute__ ((__always_inline__))
23899 vst1_f64 (float64_t *a, float64x1_t b)
23900 {
23901 *a = b;
23902 }
23903
23904 __extension__ static __inline void __attribute__ ((__always_inline__))
23905 vst1_p8 (poly8_t *a, poly8x8_t b)
23906 {
23907 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23908 (int8x8_t) b);
23909 }
23910
23911 __extension__ static __inline void __attribute__ ((__always_inline__))
23912 vst1_p16 (poly16_t *a, poly16x4_t b)
23913 {
23914 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23915 (int16x4_t) b);
23916 }
23917
23918 __extension__ static __inline void __attribute__ ((__always_inline__))
23919 vst1_s8 (int8_t *a, int8x8_t b)
23920 {
23921 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
23922 }
23923
23924 __extension__ static __inline void __attribute__ ((__always_inline__))
23925 vst1_s16 (int16_t *a, int16x4_t b)
23926 {
23927 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
23928 }
23929
23930 __extension__ static __inline void __attribute__ ((__always_inline__))
23931 vst1_s32 (int32_t *a, int32x2_t b)
23932 {
23933 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
23934 }
23935
23936 __extension__ static __inline void __attribute__ ((__always_inline__))
23937 vst1_s64 (int64_t *a, int64x1_t b)
23938 {
23939 *a = b;
23940 }
23941
23942 __extension__ static __inline void __attribute__ ((__always_inline__))
23943 vst1_u8 (uint8_t *a, uint8x8_t b)
23944 {
23945 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23946 (int8x8_t) b);
23947 }
23948
23949 __extension__ static __inline void __attribute__ ((__always_inline__))
23950 vst1_u16 (uint16_t *a, uint16x4_t b)
23951 {
23952 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23953 (int16x4_t) b);
23954 }
23955
23956 __extension__ static __inline void __attribute__ ((__always_inline__))
23957 vst1_u32 (uint32_t *a, uint32x2_t b)
23958 {
23959 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
23960 (int32x2_t) b);
23961 }
23962
23963 __extension__ static __inline void __attribute__ ((__always_inline__))
23964 vst1_u64 (uint64_t *a, uint64x1_t b)
23965 {
23966 *a = b;
23967 }
23968
23969 __extension__ static __inline void __attribute__ ((__always_inline__))
23970 vst1q_f32 (float32_t *a, float32x4_t b)
23971 {
23972 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
23973 }
23974
23975 __extension__ static __inline void __attribute__ ((__always_inline__))
23976 vst1q_f64 (float64_t *a, float64x2_t b)
23977 {
23978 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
23979 }
23980
23981 /* vst1q */
23982
23983 __extension__ static __inline void __attribute__ ((__always_inline__))
23984 vst1q_p8 (poly8_t *a, poly8x16_t b)
23985 {
23986 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23987 (int8x16_t) b);
23988 }
23989
23990 __extension__ static __inline void __attribute__ ((__always_inline__))
23991 vst1q_p16 (poly16_t *a, poly16x8_t b)
23992 {
23993 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23994 (int16x8_t) b);
23995 }
23996
23997 __extension__ static __inline void __attribute__ ((__always_inline__))
23998 vst1q_s8 (int8_t *a, int8x16_t b)
23999 {
24000 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
24001 }
24002
24003 __extension__ static __inline void __attribute__ ((__always_inline__))
24004 vst1q_s16 (int16_t *a, int16x8_t b)
24005 {
24006 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24007 }
24008
24009 __extension__ static __inline void __attribute__ ((__always_inline__))
24010 vst1q_s32 (int32_t *a, int32x4_t b)
24011 {
24012 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24013 }
24014
24015 __extension__ static __inline void __attribute__ ((__always_inline__))
24016 vst1q_s64 (int64_t *a, int64x2_t b)
24017 {
24018 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24019 }
24020
24021 __extension__ static __inline void __attribute__ ((__always_inline__))
24022 vst1q_u8 (uint8_t *a, uint8x16_t b)
24023 {
24024 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24025 (int8x16_t) b);
24026 }
24027
24028 __extension__ static __inline void __attribute__ ((__always_inline__))
24029 vst1q_u16 (uint16_t *a, uint16x8_t b)
24030 {
24031 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24032 (int16x8_t) b);
24033 }
24034
24035 __extension__ static __inline void __attribute__ ((__always_inline__))
24036 vst1q_u32 (uint32_t *a, uint32x4_t b)
24037 {
24038 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24039 (int32x4_t) b);
24040 }
24041
24042 __extension__ static __inline void __attribute__ ((__always_inline__))
24043 vst1q_u64 (uint64_t *a, uint64x2_t b)
24044 {
24045 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24046 (int64x2_t) b);
24047 }
24048
24049 /* vstn */
24050
24051 __extension__ static __inline void
24052 vst2_s64 (int64_t * __a, int64x1x2_t val)
24053 {
24054 __builtin_aarch64_simd_oi __o;
24055 int64x2x2_t temp;
24056 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24057 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24058 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24059 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24060 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24061 }
24062
24063 __extension__ static __inline void
24064 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24065 {
24066 __builtin_aarch64_simd_oi __o;
24067 uint64x2x2_t temp;
24068 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24069 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24070 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24071 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24072 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24073 }
24074
24075 __extension__ static __inline void
24076 vst2_f64 (float64_t * __a, float64x1x2_t val)
24077 {
24078 __builtin_aarch64_simd_oi __o;
24079 float64x2x2_t temp;
24080 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24081 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24082 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24083 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24084 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24085 }
24086
24087 __extension__ static __inline void
24088 vst2_s8 (int8_t * __a, int8x8x2_t val)
24089 {
24090 __builtin_aarch64_simd_oi __o;
24091 int8x16x2_t temp;
24092 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24093 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24094 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24095 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24096 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24097 }
24098
24099 __extension__ static __inline void __attribute__ ((__always_inline__))
24100 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24101 {
24102 __builtin_aarch64_simd_oi __o;
24103 poly8x16x2_t temp;
24104 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24105 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24106 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24107 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24108 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24109 }
24110
24111 __extension__ static __inline void __attribute__ ((__always_inline__))
24112 vst2_s16 (int16_t * __a, int16x4x2_t val)
24113 {
24114 __builtin_aarch64_simd_oi __o;
24115 int16x8x2_t temp;
24116 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24117 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24118 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24119 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24120 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24121 }
24122
24123 __extension__ static __inline void __attribute__ ((__always_inline__))
24124 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24125 {
24126 __builtin_aarch64_simd_oi __o;
24127 poly16x8x2_t temp;
24128 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24129 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24130 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24131 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24132 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24133 }
24134
24135 __extension__ static __inline void __attribute__ ((__always_inline__))
24136 vst2_s32 (int32_t * __a, int32x2x2_t val)
24137 {
24138 __builtin_aarch64_simd_oi __o;
24139 int32x4x2_t temp;
24140 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24141 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24142 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24143 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24144 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24145 }
24146
24147 __extension__ static __inline void __attribute__ ((__always_inline__))
24148 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24149 {
24150 __builtin_aarch64_simd_oi __o;
24151 uint8x16x2_t temp;
24152 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24153 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24154 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24155 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24156 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24157 }
24158
24159 __extension__ static __inline void __attribute__ ((__always_inline__))
24160 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24161 {
24162 __builtin_aarch64_simd_oi __o;
24163 uint16x8x2_t temp;
24164 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24165 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24166 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24167 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24168 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24169 }
24170
24171 __extension__ static __inline void __attribute__ ((__always_inline__))
24172 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24173 {
24174 __builtin_aarch64_simd_oi __o;
24175 uint32x4x2_t temp;
24176 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24177 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24178 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24179 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24180 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24181 }
24182
24183 __extension__ static __inline void __attribute__ ((__always_inline__))
24184 vst2_f32 (float32_t * __a, float32x2x2_t val)
24185 {
24186 __builtin_aarch64_simd_oi __o;
24187 float32x4x2_t temp;
24188 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24189 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24190 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24191 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24192 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24193 }
24194
24195 __extension__ static __inline void __attribute__ ((__always_inline__))
24196 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24197 {
24198 __builtin_aarch64_simd_oi __o;
24199 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24200 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24201 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24202 }
24203
24204 __extension__ static __inline void __attribute__ ((__always_inline__))
24205 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24206 {
24207 __builtin_aarch64_simd_oi __o;
24208 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24209 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24210 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24211 }
24212
24213 __extension__ static __inline void __attribute__ ((__always_inline__))
24214 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24215 {
24216 __builtin_aarch64_simd_oi __o;
24217 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24218 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24219 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24220 }
24221
24222 __extension__ static __inline void __attribute__ ((__always_inline__))
24223 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24224 {
24225 __builtin_aarch64_simd_oi __o;
24226 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24227 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24228 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24229 }
24230
24231 __extension__ static __inline void __attribute__ ((__always_inline__))
24232 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24233 {
24234 __builtin_aarch64_simd_oi __o;
24235 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24236 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24237 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24238 }
24239
24240 __extension__ static __inline void __attribute__ ((__always_inline__))
24241 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24242 {
24243 __builtin_aarch64_simd_oi __o;
24244 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24245 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24246 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24247 }
24248
24249 __extension__ static __inline void __attribute__ ((__always_inline__))
24250 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24251 {
24252 __builtin_aarch64_simd_oi __o;
24253 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24254 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24255 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24256 }
24257
24258 __extension__ static __inline void __attribute__ ((__always_inline__))
24259 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24260 {
24261 __builtin_aarch64_simd_oi __o;
24262 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24263 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24264 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24265 }
24266
24267 __extension__ static __inline void __attribute__ ((__always_inline__))
24268 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24269 {
24270 __builtin_aarch64_simd_oi __o;
24271 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24272 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24273 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24274 }
24275
24276 __extension__ static __inline void __attribute__ ((__always_inline__))
24277 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24278 {
24279 __builtin_aarch64_simd_oi __o;
24280 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24281 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24282 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24283 }
24284
24285 __extension__ static __inline void __attribute__ ((__always_inline__))
24286 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24287 {
24288 __builtin_aarch64_simd_oi __o;
24289 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24290 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24291 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24292 }
24293
24294 __extension__ static __inline void __attribute__ ((__always_inline__))
24295 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24296 {
24297 __builtin_aarch64_simd_oi __o;
24298 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24299 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24300 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24301 }
24302
24303 __extension__ static __inline void
24304 vst3_s64 (int64_t * __a, int64x1x3_t val)
24305 {
24306 __builtin_aarch64_simd_ci __o;
24307 int64x2x3_t temp;
24308 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24309 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24310 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24311 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24312 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24313 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24314 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24315 }
24316
24317 __extension__ static __inline void
24318 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24319 {
24320 __builtin_aarch64_simd_ci __o;
24321 uint64x2x3_t temp;
24322 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24323 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24324 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24325 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24326 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24327 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24328 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24329 }
24330
24331 __extension__ static __inline void
24332 vst3_f64 (float64_t * __a, float64x1x3_t val)
24333 {
24334 __builtin_aarch64_simd_ci __o;
24335 float64x2x3_t temp;
24336 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24337 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24338 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24339 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24340 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24341 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24342 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24343 }
24344
24345 __extension__ static __inline void
24346 vst3_s8 (int8_t * __a, int8x8x3_t val)
24347 {
24348 __builtin_aarch64_simd_ci __o;
24349 int8x16x3_t temp;
24350 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24351 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24352 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24353 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24354 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24355 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24356 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24357 }
24358
24359 __extension__ static __inline void __attribute__ ((__always_inline__))
24360 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24361 {
24362 __builtin_aarch64_simd_ci __o;
24363 poly8x16x3_t temp;
24364 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24365 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24366 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24367 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24368 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24369 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24370 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24371 }
24372
24373 __extension__ static __inline void __attribute__ ((__always_inline__))
24374 vst3_s16 (int16_t * __a, int16x4x3_t val)
24375 {
24376 __builtin_aarch64_simd_ci __o;
24377 int16x8x3_t temp;
24378 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24379 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24380 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24381 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24382 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24383 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24384 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24385 }
24386
24387 __extension__ static __inline void __attribute__ ((__always_inline__))
24388 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24389 {
24390 __builtin_aarch64_simd_ci __o;
24391 poly16x8x3_t temp;
24392 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24393 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24394 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24395 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24396 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24397 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24398 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24399 }
24400
24401 __extension__ static __inline void __attribute__ ((__always_inline__))
24402 vst3_s32 (int32_t * __a, int32x2x3_t val)
24403 {
24404 __builtin_aarch64_simd_ci __o;
24405 int32x4x3_t temp;
24406 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24407 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24408 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24409 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24410 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24411 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24412 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24413 }
24414
24415 __extension__ static __inline void __attribute__ ((__always_inline__))
24416 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24417 {
24418 __builtin_aarch64_simd_ci __o;
24419 uint8x16x3_t temp;
24420 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24421 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24422 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24423 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24424 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24425 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24426 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24427 }
24428
24429 __extension__ static __inline void __attribute__ ((__always_inline__))
24430 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24431 {
24432 __builtin_aarch64_simd_ci __o;
24433 uint16x8x3_t temp;
24434 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24435 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24436 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24437 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24438 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24439 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24440 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24441 }
24442
24443 __extension__ static __inline void __attribute__ ((__always_inline__))
24444 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24445 {
24446 __builtin_aarch64_simd_ci __o;
24447 uint32x4x3_t temp;
24448 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24449 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24450 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24451 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24452 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24453 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24454 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24455 }
24456
24457 __extension__ static __inline void __attribute__ ((__always_inline__))
24458 vst3_f32 (float32_t * __a, float32x2x3_t val)
24459 {
24460 __builtin_aarch64_simd_ci __o;
24461 float32x4x3_t temp;
24462 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24463 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24464 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24465 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24466 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24467 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24468 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24469 }
24470
24471 __extension__ static __inline void __attribute__ ((__always_inline__))
24472 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24473 {
24474 __builtin_aarch64_simd_ci __o;
24475 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24476 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24477 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24478 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24479 }
24480
24481 __extension__ static __inline void __attribute__ ((__always_inline__))
24482 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24483 {
24484 __builtin_aarch64_simd_ci __o;
24485 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24486 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24487 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24488 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24489 }
24490
24491 __extension__ static __inline void __attribute__ ((__always_inline__))
24492 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24493 {
24494 __builtin_aarch64_simd_ci __o;
24495 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24496 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24497 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24498 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24499 }
24500
24501 __extension__ static __inline void __attribute__ ((__always_inline__))
24502 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24503 {
24504 __builtin_aarch64_simd_ci __o;
24505 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24506 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24507 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24508 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24509 }
24510
24511 __extension__ static __inline void __attribute__ ((__always_inline__))
24512 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24513 {
24514 __builtin_aarch64_simd_ci __o;
24515 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24516 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24517 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24518 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24519 }
24520
24521 __extension__ static __inline void __attribute__ ((__always_inline__))
24522 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24523 {
24524 __builtin_aarch64_simd_ci __o;
24525 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24526 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24527 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24528 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24529 }
24530
24531 __extension__ static __inline void __attribute__ ((__always_inline__))
24532 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24533 {
24534 __builtin_aarch64_simd_ci __o;
24535 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24536 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24537 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24538 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24539 }
24540
24541 __extension__ static __inline void __attribute__ ((__always_inline__))
24542 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24543 {
24544 __builtin_aarch64_simd_ci __o;
24545 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24546 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24547 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24548 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24549 }
24550
24551 __extension__ static __inline void __attribute__ ((__always_inline__))
24552 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24553 {
24554 __builtin_aarch64_simd_ci __o;
24555 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24556 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24557 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24558 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24559 }
24560
24561 __extension__ static __inline void __attribute__ ((__always_inline__))
24562 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24563 {
24564 __builtin_aarch64_simd_ci __o;
24565 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24566 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24567 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24568 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24569 }
24570
24571 __extension__ static __inline void __attribute__ ((__always_inline__))
24572 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24573 {
24574 __builtin_aarch64_simd_ci __o;
24575 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24576 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24577 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24578 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24579 }
24580
24581 __extension__ static __inline void __attribute__ ((__always_inline__))
24582 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24583 {
24584 __builtin_aarch64_simd_ci __o;
24585 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24586 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24587 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24588 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24589 }
24590
24591 __extension__ static __inline void
24592 vst4_s64 (int64_t * __a, int64x1x4_t val)
24593 {
24594 __builtin_aarch64_simd_xi __o;
24595 int64x2x4_t temp;
24596 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24597 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24598 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24599 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24600 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24601 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24602 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24603 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24604 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24605 }
24606
24607 __extension__ static __inline void
24608 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24609 {
24610 __builtin_aarch64_simd_xi __o;
24611 uint64x2x4_t temp;
24612 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24613 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24614 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24615 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24616 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24617 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24618 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24619 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24620 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24621 }
24622
24623 __extension__ static __inline void
24624 vst4_f64 (float64_t * __a, float64x1x4_t val)
24625 {
24626 __builtin_aarch64_simd_xi __o;
24627 float64x2x4_t temp;
24628 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24629 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24630 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24631 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24632 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24633 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24634 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24635 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24636 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24637 }
24638
24639 __extension__ static __inline void
24640 vst4_s8 (int8_t * __a, int8x8x4_t val)
24641 {
24642 __builtin_aarch64_simd_xi __o;
24643 int8x16x4_t temp;
24644 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24645 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24646 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24647 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24648 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24649 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24650 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24651 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24652 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24653 }
24654
24655 __extension__ static __inline void __attribute__ ((__always_inline__))
24656 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24657 {
24658 __builtin_aarch64_simd_xi __o;
24659 poly8x16x4_t temp;
24660 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24661 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24662 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24663 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24664 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24665 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24666 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24667 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24668 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24669 }
24670
24671 __extension__ static __inline void __attribute__ ((__always_inline__))
24672 vst4_s16 (int16_t * __a, int16x4x4_t val)
24673 {
24674 __builtin_aarch64_simd_xi __o;
24675 int16x8x4_t temp;
24676 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24677 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24678 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24679 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24680 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24681 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24682 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24683 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24684 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24685 }
24686
24687 __extension__ static __inline void __attribute__ ((__always_inline__))
24688 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24689 {
24690 __builtin_aarch64_simd_xi __o;
24691 poly16x8x4_t temp;
24692 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24693 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24694 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24695 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24696 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24697 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24698 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24699 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24700 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24701 }
24702
24703 __extension__ static __inline void __attribute__ ((__always_inline__))
24704 vst4_s32 (int32_t * __a, int32x2x4_t val)
24705 {
24706 __builtin_aarch64_simd_xi __o;
24707 int32x4x4_t temp;
24708 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24709 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24710 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24711 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
24712 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24713 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24714 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24715 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24716 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24717 }
24718
24719 __extension__ static __inline void __attribute__ ((__always_inline__))
24720 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24721 {
24722 __builtin_aarch64_simd_xi __o;
24723 uint8x16x4_t temp;
24724 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24725 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24726 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24727 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
24728 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24729 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24730 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24731 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24732 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24733 }
24734
24735 __extension__ static __inline void __attribute__ ((__always_inline__))
24736 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24737 {
24738 __builtin_aarch64_simd_xi __o;
24739 uint16x8x4_t temp;
24740 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24741 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24742 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24743 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
24744 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24745 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24746 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24747 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24748 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24749 }
24750
24751 __extension__ static __inline void __attribute__ ((__always_inline__))
24752 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24753 {
24754 __builtin_aarch64_simd_xi __o;
24755 uint32x4x4_t temp;
24756 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24757 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24758 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24759 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
24760 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24761 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24762 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24763 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24764 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24765 }
24766
24767 __extension__ static __inline void __attribute__ ((__always_inline__))
24768 vst4_f32 (float32_t * __a, float32x2x4_t val)
24769 {
24770 __builtin_aarch64_simd_xi __o;
24771 float32x4x4_t temp;
24772 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24773 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24774 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24775 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
24776 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24777 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24778 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24779 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24780 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24781 }
24782
24783 __extension__ static __inline void __attribute__ ((__always_inline__))
24784 vst4q_s8 (int8_t * __a, int8x16x4_t val)
24785 {
24786 __builtin_aarch64_simd_xi __o;
24787 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24788 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24789 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24790 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24791 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24792 }
24793
24794 __extension__ static __inline void __attribute__ ((__always_inline__))
24795 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24796 {
24797 __builtin_aarch64_simd_xi __o;
24798 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24799 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24800 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24801 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24802 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24803 }
24804
24805 __extension__ static __inline void __attribute__ ((__always_inline__))
24806 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24807 {
24808 __builtin_aarch64_simd_xi __o;
24809 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24810 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24811 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24812 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24813 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24814 }
24815
24816 __extension__ static __inline void __attribute__ ((__always_inline__))
24817 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24818 {
24819 __builtin_aarch64_simd_xi __o;
24820 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24821 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24822 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24823 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24824 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24825 }
24826
24827 __extension__ static __inline void __attribute__ ((__always_inline__))
24828 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24829 {
24830 __builtin_aarch64_simd_xi __o;
24831 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24832 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24833 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24834 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24835 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24836 }
24837
24838 __extension__ static __inline void __attribute__ ((__always_inline__))
24839 vst4q_s64 (int64_t * __a, int64x2x4_t val)
24840 {
24841 __builtin_aarch64_simd_xi __o;
24842 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24843 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24844 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24845 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24846 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24847 }
24848
24849 __extension__ static __inline void __attribute__ ((__always_inline__))
24850 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24851 {
24852 __builtin_aarch64_simd_xi __o;
24853 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24854 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24855 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24856 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24857 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24858 }
24859
24860 __extension__ static __inline void __attribute__ ((__always_inline__))
24861 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24862 {
24863 __builtin_aarch64_simd_xi __o;
24864 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24865 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24866 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24867 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24868 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24869 }
24870
24871 __extension__ static __inline void __attribute__ ((__always_inline__))
24872 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24873 {
24874 __builtin_aarch64_simd_xi __o;
24875 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24876 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24877 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24878 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24879 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24880 }
24881
24882 __extension__ static __inline void __attribute__ ((__always_inline__))
24883 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24884 {
24885 __builtin_aarch64_simd_xi __o;
24886 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24887 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24888 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24889 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24890 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24891 }
24892
24893 __extension__ static __inline void __attribute__ ((__always_inline__))
24894 vst4q_f32 (float32_t * __a, float32x4x4_t val)
24895 {
24896 __builtin_aarch64_simd_xi __o;
24897 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24898 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24899 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24900 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24901 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24902 }
24903
24904 __extension__ static __inline void __attribute__ ((__always_inline__))
24905 vst4q_f64 (float64_t * __a, float64x2x4_t val)
24906 {
24907 __builtin_aarch64_simd_xi __o;
24908 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24909 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24910 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24911 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24912 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24913 }
24914
24915 /* vsub */
24916
24917 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24918 vsubd_s64 (int64x1_t __a, int64x1_t __b)
24919 {
24920 return __a - __b;
24921 }
24922
24923 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24924 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
24925 {
24926 return __a - __b;
24927 }
24928
24929 /* vtbx1 */
24930
24931 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24932 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24933 {
24934 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24935 vmov_n_u8 (8));
24936 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24937
24938 return vbsl_s8 (__mask, __tbl, __r);
24939 }
24940
24941 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24942 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24943 {
24944 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24945 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24946
24947 return vbsl_u8 (__mask, __tbl, __r);
24948 }
24949
24950 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24951 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24952 {
24953 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24954 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24955
24956 return vbsl_p8 (__mask, __tbl, __r);
24957 }
24958
24959 /* vtbx3 */
24960
24961 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24962 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24963 {
24964 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24965 vmov_n_u8 (24));
24966 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24967
24968 return vbsl_s8 (__mask, __tbl, __r);
24969 }
24970
24971 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24972 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24973 {
24974 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24975 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24976
24977 return vbsl_u8 (__mask, __tbl, __r);
24978 }
24979
24980 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24981 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24982 {
24983 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24984 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24985
24986 return vbsl_p8 (__mask, __tbl, __r);
24987 }
24988
24989 /* vtrn */
24990
24991 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24992 vtrn_f32 (float32x2_t a, float32x2_t b)
24993 {
24994 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24995 }
24996
24997 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24998 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24999 {
25000 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25001 }
25002
25003 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25004 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25005 {
25006 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25007 }
25008
25009 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25010 vtrn_s8 (int8x8_t a, int8x8_t b)
25011 {
25012 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25013 }
25014
25015 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25016 vtrn_s16 (int16x4_t a, int16x4_t b)
25017 {
25018 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25019 }
25020
25021 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25022 vtrn_s32 (int32x2_t a, int32x2_t b)
25023 {
25024 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25025 }
25026
25027 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25028 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25029 {
25030 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25031 }
25032
25033 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25034 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25035 {
25036 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25037 }
25038
25039 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25040 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25041 {
25042 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25043 }
25044
25045 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25046 vtrnq_f32 (float32x4_t a, float32x4_t b)
25047 {
25048 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25049 }
25050
25051 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25052 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25053 {
25054 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25055 }
25056
25057 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25058 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25059 {
25060 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25061 }
25062
25063 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25064 vtrnq_s8 (int8x16_t a, int8x16_t b)
25065 {
25066 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25067 }
25068
25069 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25070 vtrnq_s16 (int16x8_t a, int16x8_t b)
25071 {
25072 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25073 }
25074
25075 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25076 vtrnq_s32 (int32x4_t a, int32x4_t b)
25077 {
25078 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25079 }
25080
25081 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25082 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25083 {
25084 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25085 }
25086
25087 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25088 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25089 {
25090 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25091 }
25092
25093 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25094 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25095 {
25096 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25097 }
25098
25099 /* vtst */
25100
25101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25102 vtst_s8 (int8x8_t __a, int8x8_t __b)
25103 {
25104 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25105 }
25106
25107 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25108 vtst_s16 (int16x4_t __a, int16x4_t __b)
25109 {
25110 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25111 }
25112
25113 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25114 vtst_s32 (int32x2_t __a, int32x2_t __b)
25115 {
25116 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25117 }
25118
25119 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25120 vtst_s64 (int64x1_t __a, int64x1_t __b)
25121 {
25122 return (__a & __b) ? -1ll : 0ll;
25123 }
25124
25125 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25126 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25127 {
25128 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25129 (int8x8_t) __b);
25130 }
25131
25132 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25133 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25134 {
25135 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25136 (int16x4_t) __b);
25137 }
25138
25139 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25140 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25141 {
25142 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25143 (int32x2_t) __b);
25144 }
25145
25146 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25147 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25148 {
25149 return (__a & __b) ? -1ll : 0ll;
25150 }
25151
25152 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25153 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25154 {
25155 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25156 }
25157
25158 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25159 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25160 {
25161 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25162 }
25163
25164 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25165 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25166 {
25167 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25168 }
25169
25170 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25171 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25172 {
25173 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25174 }
25175
25176 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25177 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25178 {
25179 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25180 (int8x16_t) __b);
25181 }
25182
25183 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25184 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25185 {
25186 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25187 (int16x8_t) __b);
25188 }
25189
25190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25191 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25192 {
25193 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25194 (int32x4_t) __b);
25195 }
25196
25197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25198 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25199 {
25200 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25201 (int64x2_t) __b);
25202 }
25203
25204 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25205 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25206 {
25207 return (__a & __b) ? -1ll : 0ll;
25208 }
25209
25210 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25211 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25212 {
25213 return (__a & __b) ? -1ll : 0ll;
25214 }
25215
25216 /* vuqadd */
25217
25218 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25219 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25220 {
25221 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25222 }
25223
25224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25225 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25226 {
25227 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25228 }
25229
25230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25231 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25232 {
25233 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25234 }
25235
25236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25237 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25238 {
25239 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25240 }
25241
25242 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25243 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25244 {
25245 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25246 }
25247
25248 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25249 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25250 {
25251 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25252 }
25253
25254 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25255 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25256 {
25257 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25258 }
25259
25260 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25261 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25262 {
25263 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25264 }
25265
25266 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25267 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25268 {
25269 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25270 }
25271
25272 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25273 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25274 {
25275 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25276 }
25277
25278 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25279 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25280 {
25281 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25282 }
25283
25284 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25285 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25286 {
25287 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25288 }
25289
25290 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25291 __extension__ static __inline rettype \
25292 __attribute__ ((__always_inline__)) \
25293 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25294 { \
25295 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25296 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25297 }
25298
25299 #define __INTERLEAVE_LIST(op) \
25300 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25301 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25302 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25303 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25304 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25305 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25306 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25307 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25308 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25309 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25310 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25311 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25312 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25313 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25314 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25315 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25316 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25317 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25318
25319 /* vuzp */
25320
25321 __INTERLEAVE_LIST (uzp)
25322
25323 /* vzip */
25324
25325 __INTERLEAVE_LIST (zip)
25326
25327 #undef __INTERLEAVE_LIST
25328 #undef __DEFINTERLEAVE
25329
25330 /* End of optimal implementations in approved order. */
25331
25332 #undef __LANE0
25333
25334 #undef __aarch64_vget_lane_any
25335 #undef __aarch64_vget_lane_f32
25336 #undef __aarch64_vget_lane_f64
25337 #undef __aarch64_vget_lane_p8
25338 #undef __aarch64_vget_lane_p16
25339 #undef __aarch64_vget_lane_s8
25340 #undef __aarch64_vget_lane_s16
25341 #undef __aarch64_vget_lane_s32
25342 #undef __aarch64_vget_lane_s64
25343 #undef __aarch64_vget_lane_u8
25344 #undef __aarch64_vget_lane_u16
25345 #undef __aarch64_vget_lane_u32
25346 #undef __aarch64_vget_lane_u64
25347
25348 #undef __aarch64_vgetq_lane_f32
25349 #undef __aarch64_vgetq_lane_f64
25350 #undef __aarch64_vgetq_lane_p8
25351 #undef __aarch64_vgetq_lane_p16
25352 #undef __aarch64_vgetq_lane_s8
25353 #undef __aarch64_vgetq_lane_s16
25354 #undef __aarch64_vgetq_lane_s32
25355 #undef __aarch64_vgetq_lane_s64
25356 #undef __aarch64_vgetq_lane_u8
25357 #undef __aarch64_vgetq_lane_u16
25358 #undef __aarch64_vgetq_lane_u32
25359 #undef __aarch64_vgetq_lane_u64
25360
25361 #undef __aarch64_vdup_lane_any
25362 #undef __aarch64_vdup_lane_f32
25363 #undef __aarch64_vdup_lane_f64
25364 #undef __aarch64_vdup_lane_p8
25365 #undef __aarch64_vdup_lane_p16
25366 #undef __aarch64_vdup_lane_s8
25367 #undef __aarch64_vdup_lane_s16
25368 #undef __aarch64_vdup_lane_s32
25369 #undef __aarch64_vdup_lane_s64
25370 #undef __aarch64_vdup_lane_u8
25371 #undef __aarch64_vdup_lane_u16
25372 #undef __aarch64_vdup_lane_u32
25373 #undef __aarch64_vdup_lane_u64
25374 #undef __aarch64_vdup_laneq_f32
25375 #undef __aarch64_vdup_laneq_f64
25376 #undef __aarch64_vdup_laneq_p8
25377 #undef __aarch64_vdup_laneq_p16
25378 #undef __aarch64_vdup_laneq_s8
25379 #undef __aarch64_vdup_laneq_s16
25380 #undef __aarch64_vdup_laneq_s32
25381 #undef __aarch64_vdup_laneq_s64
25382 #undef __aarch64_vdup_laneq_u8
25383 #undef __aarch64_vdup_laneq_u16
25384 #undef __aarch64_vdup_laneq_u32
25385 #undef __aarch64_vdup_laneq_u64
25386 #undef __aarch64_vdupq_lane_f32
25387 #undef __aarch64_vdupq_lane_f64
25388 #undef __aarch64_vdupq_lane_p8
25389 #undef __aarch64_vdupq_lane_p16
25390 #undef __aarch64_vdupq_lane_s8
25391 #undef __aarch64_vdupq_lane_s16
25392 #undef __aarch64_vdupq_lane_s32
25393 #undef __aarch64_vdupq_lane_s64
25394 #undef __aarch64_vdupq_lane_u8
25395 #undef __aarch64_vdupq_lane_u16
25396 #undef __aarch64_vdupq_lane_u32
25397 #undef __aarch64_vdupq_lane_u64
25398 #undef __aarch64_vdupq_laneq_f32
25399 #undef __aarch64_vdupq_laneq_f64
25400 #undef __aarch64_vdupq_laneq_p8
25401 #undef __aarch64_vdupq_laneq_p16
25402 #undef __aarch64_vdupq_laneq_s8
25403 #undef __aarch64_vdupq_laneq_s16
25404 #undef __aarch64_vdupq_laneq_s32
25405 #undef __aarch64_vdupq_laneq_s64
25406 #undef __aarch64_vdupq_laneq_u8
25407 #undef __aarch64_vdupq_laneq_u16
25408 #undef __aarch64_vdupq_laneq_u32
25409 #undef __aarch64_vdupq_laneq_u64
25410
25411 #endif