]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/aarch64/fpu/poly_generic.h
aarch64: Add vector implementations of tan routines
[thirdparty/glibc.git] / sysdeps / aarch64 / fpu / poly_generic.h
1 /* Generic helpers for evaluating polynomials with various schemes.
2
3 Copyright (C) 2023 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20
21 #ifndef VTYPE
22 # error Cannot use poly_generic without defining VTYPE
23 #endif
24 #ifndef VWRAP
25 # error Cannot use poly_generic without defining VWRAP
26 #endif
27 #ifndef FMA
28 # error Cannot use poly_generic without defining FMA
29 #endif
30
31 static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
32 const VTYPE *poly)
33 {
34 /* At order 3, Estrin and Pairwise Horner are identical. */
35 VTYPE p01 = FMA (poly[1], x, poly[0]);
36 VTYPE p23 = FMA (poly[3], x, poly[2]);
37 return FMA (p23, x2, p01);
38 }
39
40 static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
41 const VTYPE *poly)
42 {
43 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
44 return FMA (poly[4], x4, p03);
45 }
46 static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
47 const VTYPE *poly)
48 {
49 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
50 VTYPE p45 = FMA (poly[5], x, poly[4]);
51 return FMA (p45, x4, p03);
52 }
53 static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
54 const VTYPE *poly)
55 {
56 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
57 VTYPE p45 = FMA (poly[5], x, poly[4]);
58 VTYPE p46 = FMA (poly[6], x2, p45);
59 return FMA (p46, x4, p03);
60 }
61 static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
62 const VTYPE *poly)
63 {
64 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
65 VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
66 return FMA (p47, x4, p03);
67 }
68 static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
69 const VTYPE *poly)
70 {
71 return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
72 }
73 static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
74 const VTYPE *poly)
75 {
76 VTYPE p89 = FMA (poly[9], x, poly[8]);
77 return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
78 }
79 static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
80 const VTYPE *poly)
81 {
82 VTYPE p89 = FMA (poly[9], x, poly[8]);
83 VTYPE p8_10 = FMA (poly[10], x2, p89);
84 return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
85 }
86 static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
87 const VTYPE *poly)
88 {
89 VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
90 return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
91 }
92 static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
93 const VTYPE *poly)
94 {
95 return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
96 VWRAP (estrin_7) (x, x2, x4, poly));
97 }
98 static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
99 const VTYPE *poly)
100 {
101 return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
102 VWRAP (estrin_7) (x, x2, x4, poly));
103 }
104 static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
105 const VTYPE *poly)
106 {
107 return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
108 VWRAP (estrin_7) (x, x2, x4, poly));
109 }
110 static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
111 const VTYPE *poly)
112 {
113 return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
114 VWRAP (estrin_7) (x, x2, x4, poly));
115 }
116 static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
117 VTYPE x16, const VTYPE *poly)
118 {
119 return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
120 }
121 static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
122 VTYPE x16, const VTYPE *poly)
123 {
124 VTYPE p16_17 = FMA (poly[17], x, poly[16]);
125 return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
126 }
127 static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
128 VTYPE x16, const VTYPE *poly)
129 {
130 VTYPE p16_17 = FMA (poly[17], x, poly[16]);
131 VTYPE p16_18 = FMA (poly[18], x2, p16_17);
132 return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
133 }
134 static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
135 VTYPE x16, const VTYPE *poly)
136 {
137 VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
138 return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
139 }
140
141 static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
142 {
143 VTYPE p = FMA (poly[3], x, poly[2]);
144 p = FMA (x, p, poly[1]);
145 p = FMA (x, p, poly[0]);
146 return p;
147 }
148 static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
149 {
150 VTYPE p = FMA (poly[4], x, poly[3]);
151 p = FMA (x, p, poly[2]);
152 p = FMA (x, p, poly[1]);
153 p = FMA (x, p, poly[0]);
154 return p;
155 }
156 static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
157 {
158 return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
159 }
160 static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
161 {
162 return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
163 }
164 static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
165 {
166 return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
167 }
168 static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
169 {
170 return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
171 }
172 static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
173 {
174 return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
175 }
176 static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
177 {
178 return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
179 }
180 static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
181 {
182 return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
183 }
184 static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
185 {
186 return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
187 }
188
189 static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
190 {
191 VTYPE p01 = FMA (poly[1], x, poly[0]);
192 VTYPE p23 = FMA (poly[3], x, poly[2]);
193 VTYPE p;
194 p = FMA (x2, poly[4], p23);
195 p = FMA (x2, p, p01);
196 return p;
197 }
198 static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
199 {
200 VTYPE p01 = FMA (poly[1], x, poly[0]);
201 VTYPE p23 = FMA (poly[3], x, poly[2]);
202 VTYPE p45 = FMA (poly[5], x, poly[4]);
203 VTYPE p;
204 p = FMA (x2, p45, p23);
205 p = FMA (x2, p, p01);
206 return p;
207 }
208 static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
209 {
210 VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
211 VTYPE p01 = FMA (poly[1], x, poly[0]);
212 return FMA (x2, p26, p01);
213 }
214 static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
215 {
216 VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
217 VTYPE p01 = FMA (poly[1], x, poly[0]);
218 return FMA (x2, p27, p01);
219 }
220 static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
221 {
222 VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
223 VTYPE p01 = FMA (poly[1], x, poly[0]);
224 return FMA (x2, p28, p01);
225 }
226 static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
227 {
228 VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
229 VTYPE p01 = FMA (poly[1], x, poly[0]);
230 return FMA (x2, p29, p01);
231 }
232 static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
233 {
234 VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
235 VTYPE p01 = FMA (poly[1], x, poly[0]);
236 return FMA (x2, p2_10, p01);
237 }
238 static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
239 {
240 VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
241 VTYPE p01 = FMA (poly[1], x, poly[0]);
242 return FMA (x2, p2_11, p01);
243 }
244 static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
245 {
246 VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
247 VTYPE p01 = FMA (poly[1], x, poly[0]);
248 return FMA (x2, p2_12, p01);
249 }
250 static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
251 {
252 VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
253 VTYPE p01 = FMA (poly[1], x, poly[0]);
254 return FMA (x2, p2_13, p01);
255 }
256 static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
257 {
258 VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
259 VTYPE p01 = FMA (poly[1], x, poly[0]);
260 return FMA (x2, p2_14, p01);
261 }
262 static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
263 {
264 VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
265 VTYPE p01 = FMA (poly[1], x, poly[0]);
266 return FMA (x2, p2_15, p01);
267 }
268 static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
269 {
270 VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
271 VTYPE p01 = FMA (poly[1], x, poly[0]);
272 return FMA (x2, p2_16, p01);
273 }
274 static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
275 {
276 VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
277 VTYPE p01 = FMA (poly[1], x, poly[0]);
278 return FMA (x2, p2_17, p01);
279 }
280 static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
281 {
282 VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
283 VTYPE p01 = FMA (poly[1], x, poly[0]);
284 return FMA (x2, p2_18, p01);
285 }