]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/soft-fp/op-2.h
re PR other/53317 (Conversion from __int128 to __float128)
[thirdparty/gcc.git] / libgcc / soft-fp / op-2.h
CommitLineData
49721058
JM
1/* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
fa1e55b0 3 Copyright (C) 1997-2013 Free Software Foundation, Inc.
49721058
JM
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10 The GNU C Library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
23
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
b7633ee3
KT
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
49721058
JM
32
33#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
34#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
36#define _FP_FRAC_HIGH_2(X) (X##_f1)
37#define _FP_FRAC_LOW_2(X) (X##_f0)
38#define _FP_FRAC_WORD_2(X,w) (X##_f##w)
39
40#define _FP_FRAC_SLL_2(X,N) \
41(void)(((N) < _FP_W_TYPE_SIZE) \
42 ? ({ \
43 if (__builtin_constant_p(N) && (N) == 1) \
44 { \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
46 X##_f0 += X##_f0; \
47 } \
48 else \
49 { \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 X##_f0 <<= (N); \
52 } \
53 0; \
54 }) \
55 : ({ \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 X##_f0 = 0; \
58 }))
59
60
61#define _FP_FRAC_SRL_2(X,N) \
62(void)(((N) < _FP_W_TYPE_SIZE) \
63 ? ({ \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 X##_f1 >>= (N); \
66 }) \
67 : ({ \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 X##_f1 = 0; \
70 }))
71
72/* Right shift with sticky-lsb. */
73#define _FP_FRAC_SRST_2(X,S, N,sz) \
74(void)(((N) < _FP_W_TYPE_SIZE) \
75 ? ({ \
76 S = (__builtin_constant_p(N) && (N) == 1 \
77 ? X##_f0 & 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 X##_f1 >>= (N); \
81 }) \
82 : ({ \
83 S = ((((N) == _FP_W_TYPE_SIZE \
84 ? 0 \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 | X##_f0) != 0); \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 X##_f1 = 0; \
89 }))
90
91#define _FP_FRAC_SRS_2(X,N,sz) \
92(void)(((N) < _FP_W_TYPE_SIZE) \
93 ? ({ \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
95 (__builtin_constant_p(N) && (N) == 1 \
96 ? X##_f0 & 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 X##_f1 >>= (N); \
99 }) \
100 : ({ \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
102 ((((N) == _FP_W_TYPE_SIZE \
103 ? 0 \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 | X##_f0) != 0)); \
106 X##_f1 = 0; \
107 }))
108
109#define _FP_FRAC_ADDI_2(X,I) \
110 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
111
112#define _FP_FRAC_ADD_2(R,X,Y) \
113 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115#define _FP_FRAC_SUB_2(R,X,Y) \
116 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118#define _FP_FRAC_DEC_2(X,Y) \
119 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121#define _FP_FRAC_CLZ_2(R,X) \
122 do { \
123 if (X##_f1) \
124 __FP_CLZ(R,X##_f1); \
125 else \
126 { \
127 __FP_CLZ(R,X##_f0); \
128 R += _FP_W_TYPE_SIZE; \
129 } \
130 } while(0)
131
132/* Predicates */
133#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
134#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
135#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
136#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
137#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
138#define _FP_FRAC_GT_2(X, Y) \
139 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
140#define _FP_FRAC_GE_2(X, Y) \
141 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
142
143#define _FP_ZEROFRAC_2 0, 0
144#define _FP_MINFRAC_2 0, 1
145#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
146
147/*
fa1e55b0 148 * Internals
49721058
JM
149 */
150
151#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
152
153#define __FP_CLZ_2(R, xh, xl) \
154 do { \
155 if (xh) \
156 __FP_CLZ(R,xh); \
157 else \
158 { \
159 __FP_CLZ(R,xl); \
160 R += _FP_W_TYPE_SIZE; \
161 } \
162 } while(0)
163
164#if 0
165
166#ifndef __FP_FRAC_ADDI_2
167#define __FP_FRAC_ADDI_2(xh, xl, i) \
168 (xh += ((xl += i) < i))
169#endif
170#ifndef __FP_FRAC_ADD_2
171#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
172 (rh = xh + yh + ((rl = xl + yl) < xl))
173#endif
174#ifndef __FP_FRAC_SUB_2
175#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
176 (rh = xh - yh - ((rl = xl - yl) > xl))
177#endif
178#ifndef __FP_FRAC_DEC_2
179#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
180 do { \
181 UWtype _t = xl; \
182 xh -= yh + ((xl -= yl) > _t); \
183 } while (0)
184#endif
185
186#else
187
188#undef __FP_FRAC_ADDI_2
189#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
190#undef __FP_FRAC_ADD_2
191#define __FP_FRAC_ADD_2 add_ssaaaa
192#undef __FP_FRAC_SUB_2
193#define __FP_FRAC_SUB_2 sub_ddmmss
194#undef __FP_FRAC_DEC_2
195#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
196
197#endif
198
199/*
200 * Unpack the raw bits of a native fp value. Do not classify or
201 * normalize the data.
202 */
203
204#define _FP_UNPACK_RAW_2(fs, X, val) \
205 do { \
206 union _FP_UNION_##fs _flo; _flo.flt = (val); \
207 \
208 X##_f0 = _flo.bits.frac0; \
209 X##_f1 = _flo.bits.frac1; \
210 X##_e = _flo.bits.exp; \
211 X##_s = _flo.bits.sign; \
212 } while (0)
213
214#define _FP_UNPACK_RAW_2_P(fs, X, val) \
215 do { \
216 union _FP_UNION_##fs *_flo = \
217 (union _FP_UNION_##fs *)(val); \
218 \
219 X##_f0 = _flo->bits.frac0; \
220 X##_f1 = _flo->bits.frac1; \
221 X##_e = _flo->bits.exp; \
222 X##_s = _flo->bits.sign; \
223 } while (0)
224
225
226/*
227 * Repack the raw bits of a native fp value.
228 */
229
230#define _FP_PACK_RAW_2(fs, val, X) \
231 do { \
232 union _FP_UNION_##fs _flo; \
233 \
234 _flo.bits.frac0 = X##_f0; \
235 _flo.bits.frac1 = X##_f1; \
236 _flo.bits.exp = X##_e; \
237 _flo.bits.sign = X##_s; \
238 \
239 (val) = _flo.flt; \
240 } while (0)
241
242#define _FP_PACK_RAW_2_P(fs, val, X) \
243 do { \
244 union _FP_UNION_##fs *_flo = \
245 (union _FP_UNION_##fs *)(val); \
246 \
247 _flo->bits.frac0 = X##_f0; \
248 _flo->bits.frac1 = X##_f1; \
249 _flo->bits.exp = X##_e; \
250 _flo->bits.sign = X##_s; \
251 } while (0)
252
253
254/*
255 * Multiplication algorithms:
256 */
257
258/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
259
260#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
261 do { \
262 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
263 \
264 doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
265 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
266 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
267 doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
268 \
269 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
270 _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
271 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
272 _FP_FRAC_WORD_4(_z,1)); \
273 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
274 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
275 _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
276 _FP_FRAC_WORD_4(_z,1)); \
277 \
278 /* Normalize since we know where the msb of the multiplicands \
279 were (bit B), we know that the msb of the of the product is \
280 at either 2B or 2B-1. */ \
281 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
282 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
283 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
284 } while (0)
285
286/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
287 Do only 3 multiplications instead of four. This one is for machines
288 where multiplication is much more expensive than subtraction. */
289
290#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
291 do { \
292 _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
293 _FP_W_TYPE _d; \
294 int _c1, _c2; \
295 \
296 _b_f0 = X##_f0 + X##_f1; \
297 _c1 = _b_f0 < X##_f0; \
298 _b_f1 = Y##_f0 + Y##_f1; \
299 _c2 = _b_f1 < Y##_f0; \
300 doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
301 doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
302 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
303 \
304 _b_f0 &= -_c2; \
305 _b_f1 &= -_c1; \
306 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
307 _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
308 0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
309 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
310 _b_f0); \
311 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
312 _b_f1); \
313 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
314 _FP_FRAC_WORD_4(_z,1), \
315 0, _d, _FP_FRAC_WORD_4(_z,0)); \
316 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
317 _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
318 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
319 _c_f1, _c_f0, \
320 _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
321 \
322 /* Normalize since we know where the msb of the multiplicands \
323 were (bit B), we know that the msb of the of the product is \
324 at either 2B or 2B-1. */ \
325 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
326 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
327 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
328 } while (0)
329
330#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
331 do { \
332 _FP_FRAC_DECL_4(_z); \
333 _FP_W_TYPE _x[2], _y[2]; \
334 _x[0] = X##_f0; _x[1] = X##_f1; \
335 _y[0] = Y##_f0; _y[1] = Y##_f1; \
336 \
337 mpn_mul_n(_z_f, _x, _y, 2); \
338 \
339 /* Normalize since we know where the msb of the multiplicands \
340 were (bit B), we know that the msb of the of the product is \
341 at either 2B or 2B-1. */ \
342 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
343 R##_f0 = _z_f[0]; \
344 R##_f1 = _z_f[1]; \
345 } while (0)
346
347/* Do at most 120x120=240 bits multiplication using double floating
348 point multiplication. This is useful if floating point
349 multiplication has much bigger throughput than integer multiply.
350 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
fa1e55b0 351 between 106 and 120 only.
49721058
JM
352 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
353 SETFETZ is a macro which will disable all FPU exceptions and set rounding
354 towards zero, RESETFE should optionally reset it back. */
355
356#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
357 do { \
358 static const double _const[] = { \
359 /* 2^-24 */ 5.9604644775390625e-08, \
360 /* 2^-48 */ 3.5527136788005009e-15, \
361 /* 2^-72 */ 2.1175823681357508e-22, \
362 /* 2^-96 */ 1.2621774483536189e-29, \
363 /* 2^28 */ 2.68435456e+08, \
364 /* 2^4 */ 1.600000e+01, \
365 /* 2^-20 */ 9.5367431640625e-07, \
366 /* 2^-44 */ 5.6843418860808015e-14, \
367 /* 2^-68 */ 3.3881317890172014e-21, \
368 /* 2^-92 */ 2.0194839173657902e-28, \
369 /* 2^-116 */ 1.2037062152420224e-35}; \
370 double _a240, _b240, _c240, _d240, _e240, _f240, \
371 _g240, _h240, _i240, _j240, _k240; \
372 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
373 _p240, _q240, _r240, _s240; \
374 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
375 \
376 if (wfracbits < 106 || wfracbits > 120) \
377 abort(); \
378 \
379 setfetz; \
380 \
381 _e240 = (double)(long)(X##_f0 & 0xffffff); \
382 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
383 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
384 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
385 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
386 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
387 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
388 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
389 _a240 = (double)(long)(X##_f1 >> 32); \
390 _f240 = (double)(long)(Y##_f1 >> 32); \
391 _e240 *= _const[3]; \
392 _j240 *= _const[3]; \
393 _d240 *= _const[2]; \
394 _i240 *= _const[2]; \
395 _c240 *= _const[1]; \
396 _h240 *= _const[1]; \
397 _b240 *= _const[0]; \
398 _g240 *= _const[0]; \
399 _s240.d = _e240*_j240;\
400 _r240.d = _d240*_j240 + _e240*_i240;\
401 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
402 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
403 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
404 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
405 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
406 _l240.d = _a240*_g240 + _b240*_f240; \
407 _k240 = _a240*_f240; \
408 _r240.d += _s240.d; \
409 _q240.d += _r240.d; \
410 _p240.d += _q240.d; \
411 _o240.d += _p240.d; \
412 _n240.d += _o240.d; \
413 _m240.d += _n240.d; \
414 _l240.d += _m240.d; \
415 _k240 += _l240.d; \
416 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
417 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
418 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
419 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
420 _o240.d += _const[7]; \
421 _n240.d += _const[6]; \
422 _m240.d += _const[5]; \
423 _l240.d += _const[4]; \
424 if (_s240.d != 0.0) _y240 = 1; \
425 if (_r240.d != 0.0) _y240 = 1; \
426 if (_q240.d != 0.0) _y240 = 1; \
427 if (_p240.d != 0.0) _y240 = 1; \
428 _t240 = (DItype)_k240; \
429 _u240 = _l240.i; \
430 _v240 = _m240.i; \
431 _w240 = _n240.i; \
432 _x240 = _o240.i; \
433 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
434 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
435 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
fa1e55b0
JM
436 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
437 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
438 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
439 | _y240; \
49721058
JM
440 resetfe; \
441 } while (0)
442
443/*
444 * Division algorithms:
445 */
446
447#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
448 do { \
449 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
450 if (_FP_FRAC_GT_2(X, Y)) \
451 { \
452 _n_f2 = X##_f1 >> 1; \
453 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
454 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
455 } \
456 else \
457 { \
458 R##_e--; \
459 _n_f2 = X##_f1; \
460 _n_f1 = X##_f0; \
461 _n_f0 = 0; \
462 } \
463 \
464 /* Normalize, i.e. make the most significant bit of the \
465 denominator set. */ \
466 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
467 \
468 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
469 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
470 _r_f0 = _n_f0; \
471 if (_FP_FRAC_GT_2(_m, _r)) \
472 { \
473 R##_f1--; \
474 _FP_FRAC_ADD_2(_r, Y, _r); \
475 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
476 { \
477 R##_f1--; \
478 _FP_FRAC_ADD_2(_r, Y, _r); \
479 } \
480 } \
481 _FP_FRAC_DEC_2(_r, _m); \
482 \
483 if (_r_f1 == Y##_f1) \
484 { \
485 /* This is a special case, not an optimization \
486 (_r/Y##_f1 would not fit into UWtype). \
487 As _r is guaranteed to be < Y, R##_f0 can be either \
488 (UWtype)-1 or (UWtype)-2. But as we know what kind \
489 of bits it is (sticky, guard, round), we don't care. \
490 We also don't care what the reminder is, because the \
491 guard bit will be set anyway. -jj */ \
492 R##_f0 = -1; \
493 } \
494 else \
495 { \
496 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
497 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
498 _r_f0 = 0; \
499 if (_FP_FRAC_GT_2(_m, _r)) \
500 { \
501 R##_f0--; \
502 _FP_FRAC_ADD_2(_r, Y, _r); \
503 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
504 { \
505 R##_f0--; \
506 _FP_FRAC_ADD_2(_r, Y, _r); \
507 } \
508 } \
509 if (!_FP_FRAC_EQ_2(_r, _m)) \
510 R##_f0 |= _FP_WORK_STICKY; \
511 } \
512 } while (0)
513
514
515#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
516 do { \
517 _FP_W_TYPE _x[4], _y[2], _z[4]; \
518 _y[0] = Y##_f0; _y[1] = Y##_f1; \
519 _x[0] = _x[3] = 0; \
520 if (_FP_FRAC_GT_2(X, Y)) \
521 { \
522 R##_e++; \
523 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
524 X##_f1 >> (_FP_W_TYPE_SIZE - \
525 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
526 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
527 } \
528 else \
529 { \
530 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
531 X##_f1 >> (_FP_W_TYPE_SIZE - \
532 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
533 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
534 } \
535 \
536 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
537 R##_f1 = _z[1]; \
538 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
539 } while (0)
540
541
542/*
543 * Square root algorithms:
544 * We have just one right now, maybe Newton approximation
545 * should be added for those machines where division is fast.
546 */
fa1e55b0 547
49721058
JM
548#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
549 do { \
550 while (q) \
551 { \
552 T##_f1 = S##_f1 + q; \
553 if (T##_f1 <= X##_f1) \
554 { \
555 S##_f1 = T##_f1 + q; \
556 X##_f1 -= T##_f1; \
557 R##_f1 += q; \
558 } \
559 _FP_FRAC_SLL_2(X, 1); \
560 q >>= 1; \
561 } \
562 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
563 while (q != _FP_WORK_ROUND) \
564 { \
565 T##_f0 = S##_f0 + q; \
566 T##_f1 = S##_f1; \
567 if (T##_f1 < X##_f1 || \
568 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
569 { \
570 S##_f0 = T##_f0 + q; \
571 S##_f1 += (T##_f0 > S##_f0); \
572 _FP_FRAC_DEC_2(X, T); \
573 R##_f0 += q; \
574 } \
575 _FP_FRAC_SLL_2(X, 1); \
576 q >>= 1; \
577 } \
578 if (X##_f0 | X##_f1) \
579 { \
580 if (S##_f1 < X##_f1 || \
581 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
582 R##_f0 |= _FP_WORK_ROUND; \
583 R##_f0 |= _FP_WORK_STICKY; \
584 } \
585 } while (0)
586
587
588/*
fa1e55b0 589 * Assembly/disassembly for converting to/from integral types.
49721058
JM
590 * No shifting or overflow handled here.
591 */
592
593#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
594(void)((rsize <= _FP_W_TYPE_SIZE) \
595 ? ({ r = X##_f0; }) \
596 : ({ \
597 r = X##_f1; \
598 r <<= _FP_W_TYPE_SIZE; \
599 r += X##_f0; \
600 }))
601
602#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
603 do { \
604 X##_f0 = r; \
605 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
606 } while (0)
607
608/*
609 * Convert FP values between word sizes
610 */
611
612#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
613
614#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
13cc6d1b
JM
615
616#define _FP_FRAC_COPY_2_2(D,S) _FP_FRAC_COPY_2(D,S)