]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /*---------------------------------------------------------------------------+ |
3 | | poly_sin.c | | |
4 | | | | |
5 | | Computation of an approximation of the sin function and the cosine | | |
6 | | function by a polynomial. | | |
7 | | | | |
8 | | Copyright (C) 1992,1993,1994,1997,1999 | | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | |
10 | | E-mail billm@melbpc.org.au | | |
11 | | | | |
12 | | | | |
13 | +---------------------------------------------------------------------------*/ | |
14 | ||
1da177e4 LT |
15 | #include "exception.h" |
16 | #include "reg_constant.h" | |
17 | #include "fpu_emu.h" | |
18 | #include "fpu_system.h" | |
19 | #include "control_w.h" | |
20 | #include "poly.h" | |
21 | ||
1da177e4 LT |
22 | #define N_COEFF_P 4 |
23 | #define N_COEFF_N 4 | |
24 | ||
3d0d14f9 IM |
25 | static const unsigned long long pos_terms_l[N_COEFF_P] = { |
26 | 0xaaaaaaaaaaaaaaabLL, | |
27 | 0x00d00d00d00cf906LL, | |
28 | 0x000006b99159a8bbLL, | |
29 | 0x000000000d7392e6LL | |
1da177e4 LT |
30 | }; |
31 | ||
3d0d14f9 IM |
32 | static const unsigned long long neg_terms_l[N_COEFF_N] = { |
33 | 0x2222222222222167LL, | |
34 | 0x0002e3bc74aab624LL, | |
35 | 0x0000000b09229062LL, | |
36 | 0x00000000000c7973LL | |
1da177e4 LT |
37 | }; |
38 | ||
1da177e4 LT |
39 | #define N_COEFF_PH 4 |
40 | #define N_COEFF_NH 4 | |
3d0d14f9 IM |
41 | static const unsigned long long pos_terms_h[N_COEFF_PH] = { |
42 | 0x0000000000000000LL, | |
43 | 0x05b05b05b05b0406LL, | |
44 | 0x000049f93edd91a9LL, | |
45 | 0x00000000c9c9ed62LL | |
1da177e4 LT |
46 | }; |
47 | ||
3d0d14f9 IM |
48 | static const unsigned long long neg_terms_h[N_COEFF_NH] = { |
49 | 0xaaaaaaaaaaaaaa98LL, | |
50 | 0x001a01a01a019064LL, | |
51 | 0x0000008f76c68a77LL, | |
52 | 0x0000000000d58f5eLL | |
1da177e4 LT |
53 | }; |
54 | ||
1da177e4 LT |
55 | /*--- poly_sine() -----------------------------------------------------------+ |
56 | | | | |
57 | +---------------------------------------------------------------------------*/ | |
e8d591dc | 58 | void poly_sine(FPU_REG *st0_ptr) |
1da177e4 | 59 | { |
3d0d14f9 IM |
60 | int exponent, echange; |
61 | Xsig accumulator, argSqrd, argTo4; | |
62 | unsigned long fix_up, adj; | |
63 | unsigned long long fixed_arg; | |
64 | FPU_REG result; | |
1da177e4 | 65 | |
3d0d14f9 | 66 | exponent = exponent(st0_ptr); |
1da177e4 | 67 | |
3d0d14f9 | 68 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
1da177e4 | 69 | |
3d0d14f9 IM |
70 | /* Split into two ranges, for arguments below and above 1.0 */ |
71 | /* The boundary between upper and lower is approx 0.88309101259 */ | |
72 | if ((exponent < -1) | |
73 | || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) { | |
74 | /* The argument is <= 0.88309101259 */ | |
75 | ||
76 | argSqrd.msw = st0_ptr->sigh; | |
77 | argSqrd.midw = st0_ptr->sigl; | |
78 | argSqrd.lsw = 0; | |
79 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
80 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
81 | argTo4.msw = argSqrd.msw; | |
82 | argTo4.midw = argSqrd.midw; | |
83 | argTo4.lsw = argSqrd.lsw; | |
84 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
1da177e4 | 85 | |
3d0d14f9 IM |
86 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, |
87 | N_COEFF_N - 1); | |
88 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
89 | negate_Xsig(&accumulator); | |
1da177e4 | 90 | |
3d0d14f9 IM |
91 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, |
92 | N_COEFF_P - 1); | |
1da177e4 | 93 | |
3d0d14f9 IM |
94 | shr_Xsig(&accumulator, 2); /* Divide by four */ |
95 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
1da177e4 | 96 | |
3d0d14f9 IM |
97 | mul64_Xsig(&accumulator, &significand(st0_ptr)); |
98 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
99 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
1da177e4 | 100 | |
3d0d14f9 IM |
101 | /* Divide by four, FPU_REG compatible, etc */ |
102 | exponent = 3 * exponent; | |
1da177e4 | 103 | |
3d0d14f9 IM |
104 | /* The minimum exponent difference is 3 */ |
105 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); | |
1da177e4 | 106 | |
3d0d14f9 IM |
107 | negate_Xsig(&accumulator); |
108 | XSIG_LL(accumulator) += significand(st0_ptr); | |
1da177e4 | 109 | |
3d0d14f9 | 110 | echange = round_Xsig(&accumulator); |
1da177e4 | 111 | |
3d0d14f9 IM |
112 | setexponentpos(&result, exponent(st0_ptr) + echange); |
113 | } else { | |
114 | /* The argument is > 0.88309101259 */ | |
115 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ | |
1da177e4 | 116 | |
3d0d14f9 | 117 | fixed_arg = significand(st0_ptr); |
1da177e4 | 118 | |
3d0d14f9 IM |
119 | if (exponent == 0) { |
120 | /* The argument is >= 1.0 */ | |
1da177e4 | 121 | |
3d0d14f9 IM |
122 | /* Put the binary point at the left. */ |
123 | fixed_arg <<= 1; | |
124 | } | |
125 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
126 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
127 | /* There is a special case which arises due to rounding, to fix here. */ | |
128 | if (fixed_arg == 0xffffffffffffffffLL) | |
129 | fixed_arg = 0; | |
1da177e4 | 130 | |
3d0d14f9 IM |
131 | XSIG_LL(argSqrd) = fixed_arg; |
132 | argSqrd.lsw = 0; | |
133 | mul64_Xsig(&argSqrd, &fixed_arg); | |
1da177e4 | 134 | |
3d0d14f9 IM |
135 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); |
136 | argTo4.lsw = argSqrd.lsw; | |
137 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
1da177e4 | 138 | |
3d0d14f9 IM |
139 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, |
140 | N_COEFF_NH - 1); | |
141 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
142 | negate_Xsig(&accumulator); | |
1da177e4 | 143 | |
3d0d14f9 IM |
144 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, |
145 | N_COEFF_PH - 1); | |
146 | negate_Xsig(&accumulator); | |
1da177e4 | 147 | |
3d0d14f9 IM |
148 | mul64_Xsig(&accumulator, &fixed_arg); |
149 | mul64_Xsig(&accumulator, &fixed_arg); | |
1da177e4 | 150 | |
3d0d14f9 IM |
151 | shr_Xsig(&accumulator, 3); |
152 | negate_Xsig(&accumulator); | |
1da177e4 | 153 | |
3d0d14f9 | 154 | add_Xsig_Xsig(&accumulator, &argSqrd); |
1da177e4 | 155 | |
3d0d14f9 | 156 | shr_Xsig(&accumulator, 1); |
1da177e4 | 157 | |
3d0d14f9 IM |
158 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ |
159 | negate_Xsig(&accumulator); | |
1da177e4 | 160 | |
3d0d14f9 IM |
161 | /* The basic computation is complete. Now fix the answer to |
162 | compensate for the error due to the approximation used for | |
163 | pi/2 | |
164 | */ | |
1da177e4 | 165 | |
3d0d14f9 IM |
166 | /* This has an exponent of -65 */ |
167 | fix_up = 0x898cc517; | |
168 | /* The fix-up needs to be improved for larger args */ | |
169 | if (argSqrd.msw & 0xffc00000) { | |
170 | /* Get about 32 bit precision in these: */ | |
171 | fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; | |
172 | } | |
173 | fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); | |
1da177e4 | 174 | |
3d0d14f9 IM |
175 | adj = accumulator.lsw; /* temp save */ |
176 | accumulator.lsw -= fix_up; | |
177 | if (accumulator.lsw > adj) | |
178 | XSIG_LL(accumulator)--; | |
1da177e4 | 179 | |
3d0d14f9 | 180 | echange = round_Xsig(&accumulator); |
1da177e4 | 181 | |
3d0d14f9 IM |
182 | setexponentpos(&result, echange - 1); |
183 | } | |
1da177e4 | 184 | |
3d0d14f9 IM |
185 | significand(&result) = XSIG_LL(accumulator); |
186 | setsign(&result, getsign(st0_ptr)); | |
187 | FPU_copy_to_reg0(&result, TAG_Valid); | |
1da177e4 LT |
188 | |
189 | #ifdef PARANOID | |
3d0d14f9 IM |
190 | if ((exponent(&result) >= 0) |
191 | && (significand(&result) > 0x8000000000000000LL)) { | |
192 | EXCEPTION(EX_INTERNAL | 0x150); | |
193 | } | |
1da177e4 LT |
194 | #endif /* PARANOID */ |
195 | ||
196 | } | |
197 | ||
1da177e4 LT |
198 | /*--- poly_cos() ------------------------------------------------------------+ |
199 | | | | |
200 | +---------------------------------------------------------------------------*/ | |
e8d591dc | 201 | void poly_cos(FPU_REG *st0_ptr) |
1da177e4 | 202 | { |
3d0d14f9 IM |
203 | FPU_REG result; |
204 | long int exponent, exp2, echange; | |
205 | Xsig accumulator, argSqrd, fix_up, argTo4; | |
206 | unsigned long long fixed_arg; | |
1da177e4 LT |
207 | |
208 | #ifdef PARANOID | |
3d0d14f9 IM |
209 | if ((exponent(st0_ptr) > 0) |
210 | || ((exponent(st0_ptr) == 0) | |
211 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) { | |
212 | EXCEPTION(EX_Invalid); | |
213 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | |
214 | return; | |
1da177e4 | 215 | } |
3d0d14f9 | 216 | #endif /* PARANOID */ |
1da177e4 | 217 | |
3d0d14f9 IM |
218 | exponent = exponent(st0_ptr); |
219 | ||
220 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | |
221 | ||
222 | if ((exponent < -1) | |
223 | || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) { | |
224 | /* arg is < 0.687705 */ | |
225 | ||
226 | argSqrd.msw = st0_ptr->sigh; | |
227 | argSqrd.midw = st0_ptr->sigl; | |
228 | argSqrd.lsw = 0; | |
229 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
230 | ||
231 | if (exponent < -1) { | |
232 | /* shift the argument right by the required places */ | |
233 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
234 | } | |
235 | ||
236 | argTo4.msw = argSqrd.msw; | |
237 | argTo4.midw = argSqrd.midw; | |
238 | argTo4.lsw = argSqrd.lsw; | |
239 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
240 | ||
241 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | |
242 | N_COEFF_NH - 1); | |
243 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
244 | negate_Xsig(&accumulator); | |
245 | ||
246 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | |
247 | N_COEFF_PH - 1); | |
248 | negate_Xsig(&accumulator); | |
249 | ||
250 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
251 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
252 | shr_Xsig(&accumulator, -2 * (1 + exponent)); | |
253 | ||
254 | shr_Xsig(&accumulator, 3); | |
255 | negate_Xsig(&accumulator); | |
256 | ||
257 | add_Xsig_Xsig(&accumulator, &argSqrd); | |
258 | ||
259 | shr_Xsig(&accumulator, 1); | |
260 | ||
261 | /* It doesn't matter if accumulator is all zero here, the | |
262 | following code will work ok */ | |
263 | negate_Xsig(&accumulator); | |
264 | ||
265 | if (accumulator.lsw & 0x80000000) | |
266 | XSIG_LL(accumulator)++; | |
267 | if (accumulator.msw == 0) { | |
268 | /* The result is 1.0 */ | |
269 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | |
270 | return; | |
271 | } else { | |
272 | significand(&result) = XSIG_LL(accumulator); | |
273 | ||
274 | /* will be a valid positive nr with expon = -1 */ | |
275 | setexponentpos(&result, -1); | |
276 | } | |
277 | } else { | |
278 | fixed_arg = significand(st0_ptr); | |
279 | ||
280 | if (exponent == 0) { | |
281 | /* The argument is >= 1.0 */ | |
282 | ||
283 | /* Put the binary point at the left. */ | |
284 | fixed_arg <<= 1; | |
285 | } | |
286 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
287 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
288 | /* There is a special case which arises due to rounding, to fix here. */ | |
289 | if (fixed_arg == 0xffffffffffffffffLL) | |
290 | fixed_arg = 0; | |
291 | ||
292 | exponent = -1; | |
293 | exp2 = -1; | |
294 | ||
295 | /* A shift is needed here only for a narrow range of arguments, | |
296 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ | |
297 | if (!(LL_MSW(fixed_arg) & 0xffff0000)) { | |
298 | fixed_arg <<= 16; | |
299 | exponent -= 16; | |
300 | exp2 -= 16; | |
301 | } | |
302 | ||
303 | XSIG_LL(argSqrd) = fixed_arg; | |
304 | argSqrd.lsw = 0; | |
305 | mul64_Xsig(&argSqrd, &fixed_arg); | |
306 | ||
307 | if (exponent < -1) { | |
308 | /* shift the argument right by the required places */ | |
309 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
310 | } | |
311 | ||
312 | argTo4.msw = argSqrd.msw; | |
313 | argTo4.midw = argSqrd.midw; | |
314 | argTo4.lsw = argSqrd.lsw; | |
315 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
316 | ||
317 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | |
318 | N_COEFF_N - 1); | |
319 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
320 | negate_Xsig(&accumulator); | |
321 | ||
322 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | |
323 | N_COEFF_P - 1); | |
324 | ||
325 | shr_Xsig(&accumulator, 2); /* Divide by four */ | |
326 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
327 | ||
328 | mul64_Xsig(&accumulator, &fixed_arg); | |
329 | mul64_Xsig(&accumulator, &fixed_arg); | |
330 | mul64_Xsig(&accumulator, &fixed_arg); | |
331 | ||
332 | /* Divide by four, FPU_REG compatible, etc */ | |
333 | exponent = 3 * exponent; | |
334 | ||
335 | /* The minimum exponent difference is 3 */ | |
336 | shr_Xsig(&accumulator, exp2 - exponent); | |
337 | ||
338 | negate_Xsig(&accumulator); | |
339 | XSIG_LL(accumulator) += fixed_arg; | |
340 | ||
341 | /* The basic computation is complete. Now fix the answer to | |
342 | compensate for the error due to the approximation used for | |
343 | pi/2 | |
344 | */ | |
345 | ||
346 | /* This has an exponent of -65 */ | |
347 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; | |
348 | fix_up.lsw = 0; | |
349 | ||
350 | /* The fix-up needs to be improved for larger args */ | |
351 | if (argSqrd.msw & 0xffc00000) { | |
352 | /* Get about 32 bit precision in these: */ | |
353 | fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; | |
354 | fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; | |
355 | } | |
356 | ||
357 | exp2 += norm_Xsig(&accumulator); | |
358 | shr_Xsig(&accumulator, 1); /* Prevent overflow */ | |
359 | exp2++; | |
360 | shr_Xsig(&fix_up, 65 + exp2); | |
361 | ||
362 | add_Xsig_Xsig(&accumulator, &fix_up); | |
363 | ||
364 | echange = round_Xsig(&accumulator); | |
365 | ||
366 | setexponentpos(&result, exp2 + echange); | |
367 | significand(&result) = XSIG_LL(accumulator); | |
1da177e4 LT |
368 | } |
369 | ||
3d0d14f9 | 370 | FPU_copy_to_reg0(&result, TAG_Valid); |
1da177e4 LT |
371 | |
372 | #ifdef PARANOID | |
3d0d14f9 IM |
373 | if ((exponent(&result) >= 0) |
374 | && (significand(&result) > 0x8000000000000000LL)) { | |
375 | EXCEPTION(EX_INTERNAL | 0x151); | |
376 | } | |
1da177e4 LT |
377 | #endif /* PARANOID */ |
378 | ||
379 | } |