]> git.ipfire.org Git - thirdparty/glibc.git/blob - soft-fp/op-2.h
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / soft-fp / op-2.h
1 /* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
3 Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 The GNU C Library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with the GNU C Library; if not, write to the Free
22 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23 02111-1307 USA. */
24
25 #define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
26 #define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
27 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
28 #define _FP_FRAC_HIGH_2(X) (X##_f1)
29 #define _FP_FRAC_LOW_2(X) (X##_f0)
30 #define _FP_FRAC_WORD_2(X,w) (X##_f##w)
31
32 #define _FP_FRAC_SLL_2(X,N) \
33 do { \
34 if ((N) < _FP_W_TYPE_SIZE) \
35 { \
36 if (__builtin_constant_p(N) && (N) == 1) \
37 { \
38 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
39 X##_f0 += X##_f0; \
40 } \
41 else \
42 { \
43 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
44 X##_f0 <<= (N); \
45 } \
46 } \
47 else \
48 { \
49 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
50 X##_f0 = 0; \
51 } \
52 } while (0)
53
54 #define _FP_FRAC_SRL_2(X,N) \
55 do { \
56 if ((N) < _FP_W_TYPE_SIZE) \
57 { \
58 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
59 X##_f1 >>= (N); \
60 } \
61 else \
62 { \
63 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
64 X##_f1 = 0; \
65 } \
66 } while (0)
67
68 /* Right shift with sticky-lsb. */
69 #define _FP_FRAC_SRS_2(X,N,sz) \
70 do { \
71 if ((N) < _FP_W_TYPE_SIZE) \
72 { \
73 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
74 (__builtin_constant_p(N) && (N) == 1 \
75 ? X##_f0 & 1 \
76 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
77 X##_f1 >>= (N); \
78 } \
79 else \
80 { \
81 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
82 (((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | \
83 X##_f0) != 0)); \
84 X##_f1 = 0; \
85 } \
86 } while (0)
87
88 #define _FP_FRAC_ADDI_2(X,I) \
89 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
90
91 #define _FP_FRAC_ADD_2(R,X,Y) \
92 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
93
94 #define _FP_FRAC_SUB_2(R,X,Y) \
95 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
96
97 #define _FP_FRAC_DEC_2(X,Y) \
98 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
99
100 #define _FP_FRAC_CLZ_2(R,X) \
101 do { \
102 if (X##_f1) \
103 __FP_CLZ(R,X##_f1); \
104 else \
105 { \
106 __FP_CLZ(R,X##_f0); \
107 R += _FP_W_TYPE_SIZE; \
108 } \
109 } while(0)
110
111 /* Predicates */
112 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
113 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
114 #define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
115 #define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
116 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
117 #define _FP_FRAC_GT_2(X, Y) \
118 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 > Y##_f0)
119 #define _FP_FRAC_GE_2(X, Y) \
120 (X##_f1 > Y##_f1 || X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)
121
122 #define _FP_ZEROFRAC_2 0, 0
123 #define _FP_MINFRAC_2 0, 1
124 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
125
126 /*
127 * Internals
128 */
129
130 #define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
131
132 #define __FP_CLZ_2(R, xh, xl) \
133 do { \
134 if (xh) \
135 __FP_CLZ(R,xh); \
136 else \
137 { \
138 __FP_CLZ(R,xl); \
139 R += _FP_W_TYPE_SIZE; \
140 } \
141 } while(0)
142
143 #if 0
144
145 #ifndef __FP_FRAC_ADDI_2
146 #define __FP_FRAC_ADDI_2(xh, xl, i) \
147 (xh += ((xl += i) < i))
148 #endif
149 #ifndef __FP_FRAC_ADD_2
150 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
151 (rh = xh + yh + ((rl = xl + yl) < xl))
152 #endif
153 #ifndef __FP_FRAC_SUB_2
154 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
155 (rh = xh - yh - ((rl = xl - yl) > xl))
156 #endif
157 #ifndef __FP_FRAC_DEC_2
158 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
159 do { \
160 UWtype _t = xl; \
161 xh -= yh + ((xl -= yl) > _t); \
162 } while (0)
163 #endif
164
165 #else
166
167 #undef __FP_FRAC_ADDI_2
168 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
169 #undef __FP_FRAC_ADD_2
170 #define __FP_FRAC_ADD_2 add_ssaaaa
171 #undef __FP_FRAC_SUB_2
172 #define __FP_FRAC_SUB_2 sub_ddmmss
173 #undef __FP_FRAC_DEC_2
174 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
175
176 #endif
177
178 /*
179 * Unpack the raw bits of a native fp value. Do not classify or
180 * normalize the data.
181 */
182
183 #define _FP_UNPACK_RAW_2(fs, X, val) \
184 do { \
185 union _FP_UNION_##fs _flo; _flo.flt = (val); \
186 \
187 X##_f0 = _flo.bits.frac0; \
188 X##_f1 = _flo.bits.frac1; \
189 X##_e = _flo.bits.exp; \
190 X##_s = _flo.bits.sign; \
191 } while (0)
192
193 #define _FP_UNPACK_RAW_2_P(fs, X, val) \
194 do { \
195 union _FP_UNION_##fs *_flo = \
196 (union _FP_UNION_##fs *)(val); \
197 \
198 X##_f0 = _flo->bits.frac0; \
199 X##_f1 = _flo->bits.frac1; \
200 X##_e = _flo->bits.exp; \
201 X##_s = _flo->bits.sign; \
202 } while (0)
203
204
205 /*
206 * Repack the raw bits of a native fp value.
207 */
208
209 #define _FP_PACK_RAW_2(fs, val, X) \
210 do { \
211 union _FP_UNION_##fs _flo; \
212 \
213 _flo.bits.frac0 = X##_f0; \
214 _flo.bits.frac1 = X##_f1; \
215 _flo.bits.exp = X##_e; \
216 _flo.bits.sign = X##_s; \
217 \
218 (val) = _flo.flt; \
219 } while (0)
220
221 #define _FP_PACK_RAW_2_P(fs, val, X) \
222 do { \
223 union _FP_UNION_##fs *_flo = \
224 (union _FP_UNION_##fs *)(val); \
225 \
226 _flo->bits.frac0 = X##_f0; \
227 _flo->bits.frac1 = X##_f1; \
228 _flo->bits.exp = X##_e; \
229 _flo->bits.sign = X##_s; \
230 } while (0)
231
232
233 /*
234 * Multiplication algorithms:
235 */
236
237 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
238
239 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
240 do { \
241 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
242 \
243 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
244 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
245 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
246 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
247 \
248 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
249 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
250 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
251 _FP_FRAC_WORD_4(_z,1)); \
252 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
253 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
254 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
255 _FP_FRAC_WORD_4(_z,1)); \
256 \
257 /* Normalize since we know where the msb of the multiplicands \
258 were (bit B), we know that the msb of the of the product is \
259 at either 2B or 2B-1. */ \
260 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
261 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
262 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
263 } while (0)
264
265 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
266 Do only 3 multiplications instead of four. This one is for machines
267 where multiplication is much more expensive than subtraction. */
268
269 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
270 do { \
271 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
272 _FP_W_TYPE _d; \
273 int _c1, _c2; \
274 \
275 _b_f0 = X##_f0 + X##_f1; \
276 _c1 = _b_f0 < X##_f0; \
277 _b_f1 = Y##_f0 + Y##_f1; \
278 _c2 = _b_f1 < Y##_f0; \
279 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
280 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
281 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
282 \
283 _b_f0 &= -_c2; \
284 _b_f1 &= -_c1; \
285 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
286 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
287 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
288 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
289 _b_f0); \
290 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
291 _b_f1); \
292 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
293 _FP_FRAC_WORD_4(_z,1), \
294 0, _d, _FP_FRAC_WORD_4(_z,0)); \
295 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
296 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
297 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
298 _c_f1, _c_f0, \
299 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
300 \
301 /* Normalize since we know where the msb of the multiplicands \
302 were (bit B), we know that the msb of the of the product is \
303 at either 2B or 2B-1. */ \
304 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
305 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
306 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
307 } while (0)
308
309 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
310 do { \
311 _FP_FRAC_DECL_4(_z); \
312 _FP_W_TYPE _x[2], _y[2]; \
313 _x[0] = X##_f0; _x[1] = X##_f1; \
314 _y[0] = Y##_f0; _y[1] = Y##_f1; \
315 \
316 mpn_mul_n(_z_f, _x, _y, 2); \
317 \
318 /* Normalize since we know where the msb of the multiplicands \
319 were (bit B), we know that the msb of the of the product is \
320 at either 2B or 2B-1. */ \
321 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
322 R##_f0 = _z_f[0]; \
323 R##_f1 = _z_f[1]; \
324 } while (0)
325
326 /* Do at most 120x120=240 bits multiplication using double floating
327 point multiplication. This is useful if floating point
328 multiplication has much bigger throughput than integer multiply.
329 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
330 between 106 and 120 only.
331 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
332 SETFETZ is a macro which will disable all FPU exceptions and set rounding
333 towards zero, RESETFE should optionally reset it back. */
334
335 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
336 do { \
337 static const double _const[] = { \
338 /* 2^-24 */ 5.9604644775390625e-08, \
339 /* 2^-48 */ 3.5527136788005009e-15, \
340 /* 2^-72 */ 2.1175823681357508e-22, \
341 /* 2^-96 */ 1.2621774483536189e-29, \
342 /* 2^28 */ 2.68435456e+08, \
343 /* 2^4 */ 1.600000e+01, \
344 /* 2^-20 */ 9.5367431640625e-07, \
345 /* 2^-44 */ 5.6843418860808015e-14, \
346 /* 2^-68 */ 3.3881317890172014e-21, \
347 /* 2^-92 */ 2.0194839173657902e-28, \
348 /* 2^-116 */ 1.2037062152420224e-35}; \
349 double _a240, _b240, _c240, _d240, _e240, _f240, \
350 _g240, _h240, _i240, _j240, _k240; \
351 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
352 _p240, _q240, _r240, _s240; \
353 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
354 \
355 if (wfracbits < 106 || wfracbits > 120) \
356 abort(); \
357 \
358 setfetz; \
359 \
360 _e240 = (double)(long)(X##_f0 & 0xffffff); \
361 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
362 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
363 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
364 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
365 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
366 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
367 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
368 _a240 = (double)(long)(X##_f1 >> 32); \
369 _f240 = (double)(long)(Y##_f1 >> 32); \
370 _e240 *= _const[3]; \
371 _j240 *= _const[3]; \
372 _d240 *= _const[2]; \
373 _i240 *= _const[2]; \
374 _c240 *= _const[1]; \
375 _h240 *= _const[1]; \
376 _b240 *= _const[0]; \
377 _g240 *= _const[0]; \
378 _s240.d = _e240*_j240;\
379 _r240.d = _d240*_j240 + _e240*_i240;\
380 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
381 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
382 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
383 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
384 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
385 _l240.d = _a240*_g240 + _b240*_f240; \
386 _k240 = _a240*_f240; \
387 _r240.d += _s240.d; \
388 _q240.d += _r240.d; \
389 _p240.d += _q240.d; \
390 _o240.d += _p240.d; \
391 _n240.d += _o240.d; \
392 _m240.d += _n240.d; \
393 _l240.d += _m240.d; \
394 _k240 += _l240.d; \
395 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
396 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
397 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
398 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
399 _o240.d += _const[7]; \
400 _n240.d += _const[6]; \
401 _m240.d += _const[5]; \
402 _l240.d += _const[4]; \
403 if (_s240.d != 0.0) _y240 = 1; \
404 if (_r240.d != 0.0) _y240 = 1; \
405 if (_q240.d != 0.0) _y240 = 1; \
406 if (_p240.d != 0.0) _y240 = 1; \
407 _t240 = (DItype)_k240; \
408 _u240 = _l240.i; \
409 _v240 = _m240.i; \
410 _w240 = _n240.i; \
411 _x240 = _o240.i; \
412 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
413 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
414 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
415 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
416 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
417 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
418 | _y240; \
419 resetfe; \
420 } while (0)
421
422 /*
423 * Division algorithms:
424 */
425
426 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
427 do { \
428 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
429 if (_FP_FRAC_GT_2(X, Y)) \
430 { \
431 _n_f2 = X##_f1 >> 1; \
432 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
433 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
434 } \
435 else \
436 { \
437 R##_e--; \
438 _n_f2 = X##_f1; \
439 _n_f1 = X##_f0; \
440 _n_f0 = 0; \
441 } \
442 \
443 /* Normalize, i.e. make the most significant bit of the \
444 denominator set. */ \
445 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
446 \
447 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
448 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
449 _r_f0 = _n_f0; \
450 if (_FP_FRAC_GT_2(_m, _r)) \
451 { \
452 R##_f1--; \
453 _FP_FRAC_ADD_2(_r, Y, _r); \
454 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
455 { \
456 R##_f1--; \
457 _FP_FRAC_ADD_2(_r, Y, _r); \
458 } \
459 } \
460 _FP_FRAC_DEC_2(_r, _m); \
461 \
462 if (_r_f1 == Y##_f1) \
463 { \
464 /* This is a special case, not an optimization \
465 (_r/Y##_f1 would not fit into UWtype). \
466 As _r is guaranteed to be < Y, R##_f0 can be either \
467 (UWtype)-1 or (UWtype)-2. But as we know what kind \
468 of bits it is (sticky, guard, round), we don't care. \
469 We also don't care what the reminder is, because the \
470 guard bit will be set anyway. -jj */ \
471 R##_f0 = -1; \
472 } \
473 else \
474 { \
475 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
476 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
477 _r_f0 = 0; \
478 if (_FP_FRAC_GT_2(_m, _r)) \
479 { \
480 R##_f0--; \
481 _FP_FRAC_ADD_2(_r, Y, _r); \
482 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
483 { \
484 R##_f0--; \
485 _FP_FRAC_ADD_2(_r, Y, _r); \
486 } \
487 } \
488 if (!_FP_FRAC_EQ_2(_r, _m)) \
489 R##_f0 |= _FP_WORK_STICKY; \
490 } \
491 } while (0)
492
493
494 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
495 do { \
496 _FP_W_TYPE _x[4], _y[2], _z[4]; \
497 _y[0] = Y##_f0; _y[1] = Y##_f1; \
498 _x[0] = _x[3] = 0; \
499 if (_FP_FRAC_GT_2(X, Y)) \
500 { \
501 R##_e++; \
502 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
503 X##_f1 >> (_FP_W_TYPE_SIZE - \
504 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
505 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
506 } \
507 else \
508 { \
509 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
510 X##_f1 >> (_FP_W_TYPE_SIZE - \
511 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
512 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
513 } \
514 \
515 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
516 R##_f1 = _z[1]; \
517 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
518 } while (0)
519
520
521 /*
522 * Square root algorithms:
523 * We have just one right now, maybe Newton approximation
524 * should be added for those machines where division is fast.
525 */
526
527 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
528 do { \
529 while (q) \
530 { \
531 T##_f1 = S##_f1 + q; \
532 if (T##_f1 <= X##_f1) \
533 { \
534 S##_f1 = T##_f1 + q; \
535 X##_f1 -= T##_f1; \
536 R##_f1 += q; \
537 } \
538 _FP_FRAC_SLL_2(X, 1); \
539 q >>= 1; \
540 } \
541 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
542 while (q != _FP_WORK_ROUND) \
543 { \
544 T##_f0 = S##_f0 + q; \
545 T##_f1 = S##_f1; \
546 if (T##_f1 < X##_f1 || \
547 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
548 { \
549 S##_f0 = T##_f0 + q; \
550 S##_f1 += (T##_f0 > S##_f0); \
551 _FP_FRAC_DEC_2(X, T); \
552 R##_f0 += q; \
553 } \
554 _FP_FRAC_SLL_2(X, 1); \
555 q >>= 1; \
556 } \
557 if (X##_f0 | X##_f1) \
558 { \
559 if (S##_f1 < X##_f1 || \
560 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
561 R##_f0 |= _FP_WORK_ROUND; \
562 R##_f0 |= _FP_WORK_STICKY; \
563 } \
564 } while (0)
565
566
567 /*
568 * Assembly/disassembly for converting to/from integral types.
569 * No shifting or overflow handled here.
570 */
571
572 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
573 do { \
574 if (rsize <= _FP_W_TYPE_SIZE) \
575 r = X##_f0; \
576 else \
577 { \
578 r = X##_f1; \
579 r <<= _FP_W_TYPE_SIZE; \
580 r += X##_f0; \
581 } \
582 } while (0)
583
584 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
585 do { \
586 X##_f0 = r; \
587 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
588 } while (0)
589
590 /*
591 * Convert FP values between word sizes
592 */
593
594 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
595 do { \
596 if (S##_c != FP_CLS_NAN) \
597 _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
598 _FP_WFRACBITS_##sfs); \
599 else \
600 _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
601 D##_f = S##_f0; \
602 } while (0)
603
604 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
605 do { \
606 D##_f0 = S##_f; \
607 D##_f1 = 0; \
608 _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
609 } while (0)
610