]>
Commit | Line | Data |
---|---|---|
aed39a3a JR |
1 | /* Utilities for Advanced SIMD libmvec routines. |
2 | Copyright (C) 2023 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <https://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #ifndef _V_MATH_H | |
20 | #define _V_MATH_H | |
21 | ||
22 | #include <arm_neon.h> | |
23 | #include "vecmath_config.h" | |
24 | ||
25 | #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs)) | |
26 | ||
27 | #define V_NAME_F1(fun) _ZGVnN4v_##fun##f | |
28 | #define V_NAME_D1(fun) _ZGVnN2v_##fun | |
29 | #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f | |
30 | #define V_NAME_D2(fun) _ZGVnN2vv_##fun | |
31 | ||
32 | /* Shorthand helpers for declaring constants. */ | |
5a4b6f8e JR |
33 | #define V2(X) { X, X } |
34 | #define V4(X) { X, X, X, X } | |
35 | #define V8(X) { X, X, X, X, X, X, X, X } | |
aed39a3a | 36 | |
5a4b6f8e JR |
37 | static inline int |
38 | v_any_u16h (uint16x4_t x) | |
39 | { | |
40 | return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0; | |
41 | } | |
aed39a3a JR |
42 | |
43 | static inline float32x4_t | |
44 | v_f32 (float x) | |
45 | { | |
46 | return (float32x4_t) V4 (x); | |
47 | } | |
48 | static inline uint32x4_t | |
49 | v_u32 (uint32_t x) | |
50 | { | |
51 | return (uint32x4_t) V4 (x); | |
52 | } | |
53 | static inline int32x4_t | |
54 | v_s32 (int32_t x) | |
55 | { | |
56 | return (int32x4_t) V4 (x); | |
57 | } | |
58 | ||
59 | /* true if any elements of a vector compare result is non-zero. */ | |
60 | static inline int | |
61 | v_any_u32 (uint32x4_t x) | |
62 | { | |
63 | /* assume elements in x are either 0 or -1u. */ | |
64 | return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0; | |
65 | } | |
5a4b6f8e JR |
66 | static inline int |
67 | v_any_u32h (uint32x2_t x) | |
68 | { | |
69 | return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0; | |
70 | } | |
aed39a3a JR |
71 | static inline float32x4_t |
72 | v_lookup_f32 (const float *tab, uint32x4_t idx) | |
73 | { | |
74 | return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] }; | |
75 | } | |
76 | static inline uint32x4_t | |
77 | v_lookup_u32 (const uint32_t *tab, uint32x4_t idx) | |
78 | { | |
79 | return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] }; | |
80 | } | |
81 | static inline float32x4_t | |
82 | v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p) | |
83 | { | |
84 | return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1], | |
85 | p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] }; | |
86 | } | |
87 | static inline float32x4_t | |
88 | v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2, | |
89 | float32x4_t y, uint32x4_t p) | |
90 | { | |
91 | return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0], | |
92 | p[1] ? f (x1[1], x2[1]) : y[1], | |
93 | p[2] ? f (x1[2], x2[2]) : y[2], | |
94 | p[3] ? f (x1[3], x2[3]) : y[3] }; | |
95 | } | |
96 | ||
97 | static inline float64x2_t | |
98 | v_f64 (double x) | |
99 | { | |
100 | return (float64x2_t) V2 (x); | |
101 | } | |
102 | static inline uint64x2_t | |
103 | v_u64 (uint64_t x) | |
104 | { | |
105 | return (uint64x2_t) V2 (x); | |
106 | } | |
107 | static inline int64x2_t | |
108 | v_s64 (int64_t x) | |
109 | { | |
110 | return (int64x2_t) V2 (x); | |
111 | } | |
112 | ||
113 | /* true if any elements of a vector compare result is non-zero. */ | |
114 | static inline int | |
115 | v_any_u64 (uint64x2_t x) | |
116 | { | |
117 | /* assume elements in x are either 0 or -1u. */ | |
118 | return vpaddd_u64 (x) != 0; | |
119 | } | |
120 | /* true if all elements of a vector compare result is 1. */ | |
121 | static inline int | |
122 | v_all_u64 (uint64x2_t x) | |
123 | { | |
124 | /* assume elements in x are either 0 or -1u. */ | |
125 | return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2; | |
126 | } | |
127 | static inline float64x2_t | |
128 | v_lookup_f64 (const double *tab, uint64x2_t idx) | |
129 | { | |
130 | return (float64x2_t){ tab[idx[0]], tab[idx[1]] }; | |
131 | } | |
132 | static inline uint64x2_t | |
133 | v_lookup_u64 (const uint64_t *tab, uint64x2_t idx) | |
134 | { | |
135 | return (uint64x2_t){ tab[idx[0]], tab[idx[1]] }; | |
136 | } | |
137 | static inline float64x2_t | |
138 | v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p) | |
139 | { | |
140 | return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] }; | |
141 | } | |
142 | static inline float64x2_t | |
143 | v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2, | |
144 | float64x2_t y, uint64x2_t p) | |
145 | { | |
146 | return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0], | |
147 | p[1] ? f (x1[1], x2[1]) : y[1] }; | |
148 | } | |
149 | ||
150 | #endif |