]>
git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/aarch64/fpu/poly_generic.h
1 /* Generic helpers for evaluating polynomials with various schemes.
3 Copyright (C) 2023 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
22 # error Cannot use poly_generic without defining VTYPE
25 # error Cannot use poly_generic without defining VWRAP
28 # error Cannot use poly_generic without defining FMA
31 static inline VTYPE
VWRAP (pairwise_poly_3
) (VTYPE x
, VTYPE x2
,
34 /* At order 3, Estrin and Pairwise Horner are identical. */
35 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
36 VTYPE p23
= FMA (poly
[3], x
, poly
[2]);
37 return FMA (p23
, x2
, p01
);
40 static inline VTYPE
VWRAP (estrin_4
) (VTYPE x
, VTYPE x2
, VTYPE x4
,
43 VTYPE p03
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
);
44 return FMA (poly
[4], x4
, p03
);
46 static inline VTYPE
VWRAP (estrin_5
) (VTYPE x
, VTYPE x2
, VTYPE x4
,
49 VTYPE p03
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
);
50 VTYPE p45
= FMA (poly
[5], x
, poly
[4]);
51 return FMA (p45
, x4
, p03
);
53 static inline VTYPE
VWRAP (estrin_6
) (VTYPE x
, VTYPE x2
, VTYPE x4
,
56 VTYPE p03
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
);
57 VTYPE p45
= FMA (poly
[5], x
, poly
[4]);
58 VTYPE p46
= FMA (poly
[6], x2
, p45
);
59 return FMA (p46
, x4
, p03
);
61 static inline VTYPE
VWRAP (estrin_7
) (VTYPE x
, VTYPE x2
, VTYPE x4
,
64 VTYPE p03
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
);
65 VTYPE p47
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
+ 4);
66 return FMA (p47
, x4
, p03
);
68 static inline VTYPE
VWRAP (estrin_8
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
71 return FMA (poly
[8], x8
, VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
73 static inline VTYPE
VWRAP (estrin_9
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
76 VTYPE p89
= FMA (poly
[9], x
, poly
[8]);
77 return FMA (p89
, x8
, VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
79 static inline VTYPE
VWRAP (estrin_10
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
82 VTYPE p89
= FMA (poly
[9], x
, poly
[8]);
83 VTYPE p8_10
= FMA (poly
[10], x2
, p89
);
84 return FMA (p8_10
, x8
, VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
86 static inline VTYPE
VWRAP (estrin_11
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
89 VTYPE p8_11
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
+ 8);
90 return FMA (p8_11
, x8
, VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
92 static inline VTYPE
VWRAP (estrin_12
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
95 return FMA (VWRAP (estrin_4
) (x
, x2
, x4
, poly
+ 8), x8
,
96 VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
98 static inline VTYPE
VWRAP (estrin_13
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
101 return FMA (VWRAP (estrin_5
) (x
, x2
, x4
, poly
+ 8), x8
,
102 VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
104 static inline VTYPE
VWRAP (estrin_14
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
107 return FMA (VWRAP (estrin_6
) (x
, x2
, x4
, poly
+ 8), x8
,
108 VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
110 static inline VTYPE
VWRAP (estrin_15
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
113 return FMA (VWRAP (estrin_7
) (x
, x2
, x4
, poly
+ 8), x8
,
114 VWRAP (estrin_7
) (x
, x2
, x4
, poly
));
116 static inline VTYPE
VWRAP (estrin_16
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
117 VTYPE x16
, const VTYPE
*poly
)
119 return FMA (poly
[16], x16
, VWRAP (estrin_15
) (x
, x2
, x4
, x8
, poly
));
121 static inline VTYPE
VWRAP (estrin_17
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
122 VTYPE x16
, const VTYPE
*poly
)
124 VTYPE p16_17
= FMA (poly
[17], x
, poly
[16]);
125 return FMA (p16_17
, x16
, VWRAP (estrin_15
) (x
, x2
, x4
, x8
, poly
));
127 static inline VTYPE
VWRAP (estrin_18
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
128 VTYPE x16
, const VTYPE
*poly
)
130 VTYPE p16_17
= FMA (poly
[17], x
, poly
[16]);
131 VTYPE p16_18
= FMA (poly
[18], x2
, p16_17
);
132 return FMA (p16_18
, x16
, VWRAP (estrin_15
) (x
, x2
, x4
, x8
, poly
));
134 static inline VTYPE
VWRAP (estrin_19
) (VTYPE x
, VTYPE x2
, VTYPE x4
, VTYPE x8
,
135 VTYPE x16
, const VTYPE
*poly
)
137 VTYPE p16_19
= VWRAP (pairwise_poly_3
) (x
, x2
, poly
+ 16);
138 return FMA (p16_19
, x16
, VWRAP (estrin_15
) (x
, x2
, x4
, x8
, poly
));
141 static inline VTYPE
VWRAP (horner_3
) (VTYPE x
, const VTYPE
*poly
)
143 VTYPE p
= FMA (poly
[3], x
, poly
[2]);
144 p
= FMA (x
, p
, poly
[1]);
145 p
= FMA (x
, p
, poly
[0]);
148 static inline VTYPE
VWRAP (horner_4
) (VTYPE x
, const VTYPE
*poly
)
150 VTYPE p
= FMA (poly
[4], x
, poly
[3]);
151 p
= FMA (x
, p
, poly
[2]);
152 p
= FMA (x
, p
, poly
[1]);
153 p
= FMA (x
, p
, poly
[0]);
156 static inline VTYPE
VWRAP (horner_5
) (VTYPE x
, const VTYPE
*poly
)
158 return FMA (x
, VWRAP (horner_4
) (x
, poly
+ 1), poly
[0]);
160 static inline VTYPE
VWRAP (horner_6
) (VTYPE x
, const VTYPE
*poly
)
162 return FMA (x
, VWRAP (horner_5
) (x
, poly
+ 1), poly
[0]);
164 static inline VTYPE
VWRAP (horner_7
) (VTYPE x
, const VTYPE
*poly
)
166 return FMA (x
, VWRAP (horner_6
) (x
, poly
+ 1), poly
[0]);
168 static inline VTYPE
VWRAP (horner_8
) (VTYPE x
, const VTYPE
*poly
)
170 return FMA (x
, VWRAP (horner_7
) (x
, poly
+ 1), poly
[0]);
172 static inline VTYPE
VWRAP (horner_9
) (VTYPE x
, const VTYPE
*poly
)
174 return FMA (x
, VWRAP (horner_8
) (x
, poly
+ 1), poly
[0]);
176 static inline VTYPE
VWRAP (horner_10
) (VTYPE x
, const VTYPE
*poly
)
178 return FMA (x
, VWRAP (horner_9
) (x
, poly
+ 1), poly
[0]);
180 static inline VTYPE
VWRAP (horner_11
) (VTYPE x
, const VTYPE
*poly
)
182 return FMA (x
, VWRAP (horner_10
) (x
, poly
+ 1), poly
[0]);
184 static inline VTYPE
VWRAP (horner_12
) (VTYPE x
, const VTYPE
*poly
)
186 return FMA (x
, VWRAP (horner_11
) (x
, poly
+ 1), poly
[0]);
189 static inline VTYPE
VWRAP (pw_horner_4
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
191 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
192 VTYPE p23
= FMA (poly
[3], x
, poly
[2]);
194 p
= FMA (x2
, poly
[4], p23
);
195 p
= FMA (x2
, p
, p01
);
198 static inline VTYPE
VWRAP (pw_horner_5
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
200 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
201 VTYPE p23
= FMA (poly
[3], x
, poly
[2]);
202 VTYPE p45
= FMA (poly
[5], x
, poly
[4]);
204 p
= FMA (x2
, p45
, p23
);
205 p
= FMA (x2
, p
, p01
);
208 static inline VTYPE
VWRAP (pw_horner_6
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
210 VTYPE p26
= VWRAP (pw_horner_4
) (x
, x2
, poly
+ 2);
211 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
212 return FMA (x2
, p26
, p01
);
214 static inline VTYPE
VWRAP (pw_horner_7
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
216 VTYPE p27
= VWRAP (pw_horner_5
) (x
, x2
, poly
+ 2);
217 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
218 return FMA (x2
, p27
, p01
);
220 static inline VTYPE
VWRAP (pw_horner_8
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
222 VTYPE p28
= VWRAP (pw_horner_6
) (x
, x2
, poly
+ 2);
223 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
224 return FMA (x2
, p28
, p01
);
226 static inline VTYPE
VWRAP (pw_horner_9
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
228 VTYPE p29
= VWRAP (pw_horner_7
) (x
, x2
, poly
+ 2);
229 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
230 return FMA (x2
, p29
, p01
);
232 static inline VTYPE
VWRAP (pw_horner_10
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
234 VTYPE p2_10
= VWRAP (pw_horner_8
) (x
, x2
, poly
+ 2);
235 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
236 return FMA (x2
, p2_10
, p01
);
238 static inline VTYPE
VWRAP (pw_horner_11
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
240 VTYPE p2_11
= VWRAP (pw_horner_9
) (x
, x2
, poly
+ 2);
241 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
242 return FMA (x2
, p2_11
, p01
);
244 static inline VTYPE
VWRAP (pw_horner_12
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
246 VTYPE p2_12
= VWRAP (pw_horner_10
) (x
, x2
, poly
+ 2);
247 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
248 return FMA (x2
, p2_12
, p01
);
250 static inline VTYPE
VWRAP (pw_horner_13
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
252 VTYPE p2_13
= VWRAP (pw_horner_11
) (x
, x2
, poly
+ 2);
253 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
254 return FMA (x2
, p2_13
, p01
);
256 static inline VTYPE
VWRAP (pw_horner_14
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
258 VTYPE p2_14
= VWRAP (pw_horner_12
) (x
, x2
, poly
+ 2);
259 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
260 return FMA (x2
, p2_14
, p01
);
262 static inline VTYPE
VWRAP (pw_horner_15
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
264 VTYPE p2_15
= VWRAP (pw_horner_13
) (x
, x2
, poly
+ 2);
265 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
266 return FMA (x2
, p2_15
, p01
);
268 static inline VTYPE
VWRAP (pw_horner_16
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
270 VTYPE p2_16
= VWRAP (pw_horner_14
) (x
, x2
, poly
+ 2);
271 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
272 return FMA (x2
, p2_16
, p01
);
274 static inline VTYPE
VWRAP (pw_horner_17
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
276 VTYPE p2_17
= VWRAP (pw_horner_15
) (x
, x2
, poly
+ 2);
277 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
278 return FMA (x2
, p2_17
, p01
);
280 static inline VTYPE
VWRAP (pw_horner_18
) (VTYPE x
, VTYPE x2
, const VTYPE
*poly
)
282 VTYPE p2_18
= VWRAP (pw_horner_16
) (x
, x2
, poly
+ 2);
283 VTYPE p01
= FMA (poly
[1], x
, poly
[0]);
284 return FMA (x2
, p2_18
, p01
);