]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/aarch64/fpu/v_math.h
aarch64: Add vector implementations of log1p routines
[thirdparty/glibc.git] / sysdeps / aarch64 / fpu / v_math.h
CommitLineData
aed39a3a
JR
1/* Utilities for Advanced SIMD libmvec routines.
2 Copyright (C) 2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#ifndef _V_MATH_H
20#define _V_MATH_H
21
22#include <arm_neon.h>
23#include "vecmath_config.h"
24
25#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
26
27#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
28#define V_NAME_D1(fun) _ZGVnN2v_##fun
29#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
30#define V_NAME_D2(fun) _ZGVnN2vv_##fun
31
32/* Shorthand helpers for declaring constants. */
5a4b6f8e
JR
33#define V2(X) { X, X }
34#define V4(X) { X, X, X, X }
35#define V8(X) { X, X, X, X, X, X, X, X }
aed39a3a 36
5a4b6f8e
JR
37static inline int
38v_any_u16h (uint16x4_t x)
39{
40 return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
41}
aed39a3a
JR
42
43static inline float32x4_t
44v_f32 (float x)
45{
46 return (float32x4_t) V4 (x);
47}
48static inline uint32x4_t
49v_u32 (uint32_t x)
50{
51 return (uint32x4_t) V4 (x);
52}
53static inline int32x4_t
54v_s32 (int32_t x)
55{
56 return (int32x4_t) V4 (x);
57}
58
59/* true if any elements of a vector compare result is non-zero. */
60static inline int
61v_any_u32 (uint32x4_t x)
62{
63 /* assume elements in x are either 0 or -1u. */
64 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
65}
5a4b6f8e
JR
66static inline int
67v_any_u32h (uint32x2_t x)
68{
69 return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
70}
aed39a3a
JR
71static inline float32x4_t
72v_lookup_f32 (const float *tab, uint32x4_t idx)
73{
74 return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
75}
76static inline uint32x4_t
77v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
78{
79 return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
80}
81static inline float32x4_t
82v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
83{
84 return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
85 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
86}
87static inline float32x4_t
88v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
89 float32x4_t y, uint32x4_t p)
90{
91 return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
92 p[1] ? f (x1[1], x2[1]) : y[1],
93 p[2] ? f (x1[2], x2[2]) : y[2],
94 p[3] ? f (x1[3], x2[3]) : y[3] };
95}
96
97static inline float64x2_t
98v_f64 (double x)
99{
100 return (float64x2_t) V2 (x);
101}
102static inline uint64x2_t
103v_u64 (uint64_t x)
104{
105 return (uint64x2_t) V2 (x);
106}
107static inline int64x2_t
108v_s64 (int64_t x)
109{
110 return (int64x2_t) V2 (x);
111}
112
113/* true if any elements of a vector compare result is non-zero. */
114static inline int
115v_any_u64 (uint64x2_t x)
116{
117 /* assume elements in x are either 0 or -1u. */
118 return vpaddd_u64 (x) != 0;
119}
120/* true if all elements of a vector compare result is 1. */
121static inline int
122v_all_u64 (uint64x2_t x)
123{
124 /* assume elements in x are either 0 or -1u. */
125 return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
126}
127static inline float64x2_t
128v_lookup_f64 (const double *tab, uint64x2_t idx)
129{
130 return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
131}
132static inline uint64x2_t
133v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
134{
135 return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
136}
137static inline float64x2_t
138v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
139{
140 return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
141}
142static inline float64x2_t
143v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
144 float64x2_t y, uint64x2_t p)
145{
146 return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
147 p[1] ? f (x1[1], x2[1]) : y[1] };
148}
149
150#endif