]> git.ipfire.org Git - thirdparty/glibc.git/blame - soft-fp/op-2.h
hurd: Fix build
[thirdparty/glibc.git] / soft-fp / op-2.h
CommitLineData
d876f532
UD
1/* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
04277e02 3 Copyright (C) 1997-2019 Free Software Foundation, Inc.
d876f532
UD
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
d876f532 14
638a783c
RM
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
23
d876f532
UD
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 27 Lesser General Public License for more details.
d876f532 28
41bdb6e2 29 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
d876f532 32
a2f8be9c
JM
33#ifndef SOFT_FP_OP_2_H
34#define SOFT_FP_OP_2_H 1
35
b838844b
JM
36#define _FP_FRAC_DECL_2(X) \
37 _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
51ca9e29
JM
38#define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
39#define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
d876f532
UD
40#define _FP_FRAC_HIGH_2(X) (X##_f1)
41#define _FP_FRAC_LOW_2(X) (X##_f0)
51ca9e29
JM
42#define _FP_FRAC_WORD_2(X, w) (X##_f##w)
43
44#define _FP_FRAC_SLL_2(X, N) \
45 (void) (((N) < _FP_W_TYPE_SIZE) \
46 ? ({ \
47 if (__builtin_constant_p (N) && (N) == 1) \
48 { \
49 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
50 X##_f0 += X##_f0; \
51 } \
52 else \
53 { \
54 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
55 X##_f0 <<= (N); \
56 } \
57 0; \
58 }) \
59 : ({ \
60 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
61 X##_f0 = 0; \
62 }))
63
64
65#define _FP_FRAC_SRL_2(X, N) \
66 (void) (((N) < _FP_W_TYPE_SIZE) \
67 ? ({ \
68 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
69 X##_f1 >>= (N); \
70 }) \
71 : ({ \
72 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
73 X##_f1 = 0; \
74 }))
d876f532
UD
75
76/* Right shift with sticky-lsb. */
51ca9e29
JM
77#define _FP_FRAC_SRST_2(X, S, N, sz) \
78 (void) (((N) < _FP_W_TYPE_SIZE) \
79 ? ({ \
80 S = (__builtin_constant_p (N) && (N) == 1 \
81 ? X##_f0 & 1 \
82 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
83 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
84 X##_f1 >>= (N); \
85 }) \
86 : ({ \
87 S = ((((N) == _FP_W_TYPE_SIZE \
88 ? 0 \
89 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
90 | X##_f0) != 0); \
91 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
92 X##_f1 = 0; \
93 }))
94
95#define _FP_FRAC_SRS_2(X, N, sz) \
96 (void) (((N) < _FP_W_TYPE_SIZE) \
97 ? ({ \
98 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
99 | (__builtin_constant_p (N) && (N) == 1 \
100 ? X##_f0 & 1 \
101 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
102 X##_f1 >>= (N); \
103 }) \
104 : ({ \
105 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
106 | ((((N) == _FP_W_TYPE_SIZE \
107 ? 0 \
108 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
109 | X##_f0) != 0)); \
110 X##_f1 = 0; \
111 }))
112
113#define _FP_FRAC_ADDI_2(X, I) \
114 __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
115
116#define _FP_FRAC_ADD_2(R, X, Y) \
117 __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118
119#define _FP_FRAC_SUB_2(R, X, Y) \
120 __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
121
122#define _FP_FRAC_DEC_2(X, Y) \
123 __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
124
125#define _FP_FRAC_CLZ_2(R, X) \
1e145589
JM
126 do \
127 { \
128 if (X##_f1) \
5c0508a3 129 __FP_CLZ ((R), X##_f1); \
1e145589
JM
130 else \
131 { \
5c0508a3
JM
132 __FP_CLZ ((R), X##_f0); \
133 (R) += _FP_W_TYPE_SIZE; \
1e145589
JM
134 } \
135 } \
51ca9e29 136 while (0)
d876f532 137
c4fe3ea7 138/* Predicates. */
51ca9e29 139#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
d876f532 140#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
51ca9e29
JM
141#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
142#define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
143#define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
144 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
d876f532
UD
145#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
146#define _FP_FRAC_GT_2(X, Y) \
fe0b1e85 147 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
d876f532 148#define _FP_FRAC_GE_2(X, Y) \
fe0b1e85 149 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
d876f532
UD
150
151#define _FP_ZEROFRAC_2 0, 0
152#define _FP_MINFRAC_2 0, 1
51ca9e29 153#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
d876f532 154
c4fe3ea7 155/* Internals. */
d876f532 156
51ca9e29 157#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
d876f532 158
1e145589
JM
159#define __FP_CLZ_2(R, xh, xl) \
160 do \
161 { \
162 if (xh) \
5c0508a3 163 __FP_CLZ ((R), xh); \
1e145589
JM
164 else \
165 { \
5c0508a3
JM
166 __FP_CLZ ((R), xl); \
167 (R) += _FP_W_TYPE_SIZE; \
1e145589
JM
168 } \
169 } \
51ca9e29 170 while (0)
d876f532
UD
171
172#if 0
173
71b4dea7
JM
174# ifndef __FP_FRAC_ADDI_2
175# define __FP_FRAC_ADDI_2(xh, xl, i) \
d876f532 176 (xh += ((xl += i) < i))
71b4dea7
JM
177# endif
178# ifndef __FP_FRAC_ADD_2
179# define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
d876f532 180 (rh = xh + yh + ((rl = xl + yl) < xl))
71b4dea7
JM
181# endif
182# ifndef __FP_FRAC_SUB_2
183# define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
d876f532 184 (rh = xh - yh - ((rl = xl - yl) > xl))
71b4dea7
JM
185# endif
186# ifndef __FP_FRAC_DEC_2
3a6e9887
JM
187# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
188 do \
189 { \
190 UWtype __FP_FRAC_DEC_2_t = xl; \
191 xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
192 } \
1e145589 193 while (0)
71b4dea7 194# endif
d876f532
UD
195
196#else
197
71b4dea7 198# undef __FP_FRAC_ADDI_2
51ca9e29 199# define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
71b4dea7
JM
200# undef __FP_FRAC_ADD_2
201# define __FP_FRAC_ADD_2 add_ssaaaa
202# undef __FP_FRAC_SUB_2
203# define __FP_FRAC_SUB_2 sub_ddmmss
204# undef __FP_FRAC_DEC_2
205# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
51ca9e29 206 sub_ddmmss (xh, xl, xh, xl, yh, yl)
d876f532
UD
207
208#endif
209
c4fe3ea7
JM
210/* Unpack the raw bits of a native fp value. Do not classify or
211 normalize the data. */
d876f532 212
3a6e9887
JM
213#define _FP_UNPACK_RAW_2(fs, X, val) \
214 do \
215 { \
216 union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
217 _FP_UNPACK_RAW_2_flo.flt = (val); \
218 \
219 X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
220 X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
221 X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
222 X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
223 } \
1e145589
JM
224 while (0)
225
3a6e9887
JM
226#define _FP_UNPACK_RAW_2_P(fs, X, val) \
227 do \
228 { \
229 union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
230 = (union _FP_UNION_##fs *) (val); \
231 \
232 X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
233 X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
234 X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
235 X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
236 } \
1e145589 237 while (0)
d876f532
UD
238
239
c4fe3ea7 240/* Repack the raw bits of a native fp value. */
d876f532 241
1e145589
JM
242#define _FP_PACK_RAW_2(fs, val, X) \
243 do \
244 { \
3a6e9887 245 union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
1e145589 246 \
3a6e9887
JM
247 _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
248 _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
249 _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
250 _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
1e145589 251 \
3a6e9887 252 (val) = _FP_PACK_RAW_2_flo.flt; \
1e145589
JM
253 } \
254 while (0)
255
3a6e9887
JM
256#define _FP_PACK_RAW_2_P(fs, val, X) \
257 do \
258 { \
259 union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
260 = (union _FP_UNION_##fs *) (val); \
261 \
262 _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
263 _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
264 _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
265 _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
266 } \
1e145589 267 while (0)
d876f532
UD
268
269
c4fe3ea7 270/* Multiplication algorithms: */
d876f532
UD
271
272/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
273
77f01ab5 274#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
1e145589
JM
275 do \
276 { \
3a6e9887
JM
277 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
278 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
d876f532 279 \
3a6e9887
JM
280 doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
281 X##_f0, Y##_f0); \
282 doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
283 X##_f0, Y##_f1); \
284 doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
285 X##_f1, Y##_f0); \
286 doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
287 X##_f1, Y##_f1); \
d876f532 288 \
51ca9e29 289 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887
JM
290 _FP_FRAC_WORD_4 (R, 1), 0, \
291 _FP_MUL_MEAT_DW_2_wide_b_f1, \
292 _FP_MUL_MEAT_DW_2_wide_b_f0, \
51ca9e29
JM
293 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
294 _FP_FRAC_WORD_4 (R, 1)); \
295 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887
JM
296 _FP_FRAC_WORD_4 (R, 1), 0, \
297 _FP_MUL_MEAT_DW_2_wide_c_f1, \
298 _FP_MUL_MEAT_DW_2_wide_c_f0, \
51ca9e29
JM
299 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
300 _FP_FRAC_WORD_4 (R, 1)); \
1e145589
JM
301 } \
302 while (0)
77f01ab5
JM
303
304#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
1e145589
JM
305 do \
306 { \
3a6e9887 307 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
77f01ab5 308 \
5c0508a3 309 _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
3a6e9887 310 X, Y, doit); \
d876f532 311 \
1e145589
JM
312 /* Normalize since we know where the msb of the multiplicands \
313 were (bit B), we know that the msb of the of the product is \
314 at either 2B or 2B-1. */ \
5c0508a3
JM
315 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
316 2*(wfracbits)); \
3a6e9887
JM
317 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
318 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
1e145589
JM
319 } \
320 while (0)
d876f532
UD
321
322/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
323 Do only 3 multiplications instead of four. This one is for machines
324 where multiplication is much more expensive than subtraction. */
325
77f01ab5 326#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
1e145589
JM
327 do \
328 { \
3a6e9887
JM
329 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
330 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
331 _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
332 int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
333 int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
d876f532 334 \
3a6e9887
JM
335 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
336 _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
337 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
338 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
339 _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
340 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
341 doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
342 X##_f0, Y##_f0); \
343 doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
344 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
345 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
346 doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
347 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
d876f532 348 \
3a6e9887
JM
349 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
350 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
351 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
352 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
51ca9e29 353 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887
JM
354 _FP_FRAC_WORD_4 (R, 1), \
355 (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
356 & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
357 _FP_MUL_MEAT_DW_2_wide_3mul_d, \
51ca9e29
JM
358 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
359 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887 360 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
51ca9e29 361 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887 362 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
51ca9e29
JM
363 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364 _FP_FRAC_WORD_4 (R, 1), \
3a6e9887
JM
365 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
366 _FP_FRAC_WORD_4 (R, 0)); \
51ca9e29 367 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887
JM
368 _FP_FRAC_WORD_4 (R, 1), 0, \
369 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
370 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
51ca9e29 371 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
3a6e9887
JM
372 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
373 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
51ca9e29 374 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
1e145589
JM
375 } \
376 while (0)
77f01ab5
JM
377
378#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
1e145589
JM
379 do \
380 { \
3a6e9887 381 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
d876f532 382 \
5c0508a3 383 _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
3a6e9887
JM
384 _FP_MUL_MEAT_2_wide_3mul_z, \
385 X, Y, doit); \
d876f532 386 \
1e145589
JM
387 /* Normalize since we know where the msb of the multiplicands \
388 were (bit B), we know that the msb of the of the product is \
389 at either 2B or 2B-1. */ \
3a6e9887 390 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
5c0508a3 391 (wfracbits)-1, 2*(wfracbits)); \
3a6e9887
JM
392 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
393 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
1e145589
JM
394 } \
395 while (0)
396
397#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
398 do \
399 { \
3a6e9887
JM
400 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
401 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
402 _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
403 _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
404 _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
405 _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
1e145589 406 \
3a6e9887
JM
407 mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
408 _FP_MUL_MEAT_DW_2_gmp_y, 2); \
1e145589
JM
409 } \
410 while (0)
77f01ab5
JM
411
412#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
1e145589
JM
413 do \
414 { \
3a6e9887 415 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
77f01ab5 416 \
5c0508a3 417 _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
d876f532 418 \
1e145589
JM
419 /* Normalize since we know where the msb of the multiplicands \
420 were (bit B), we know that the msb of the of the product is \
421 at either 2B or 2B-1. */ \
5c0508a3
JM
422 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
423 2*(wfracbits)); \
3a6e9887
JM
424 R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
425 R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
1e145589
JM
426 } \
427 while (0)
d876f532
UD
428
429/* Do at most 120x120=240 bits multiplication using double floating
430 point multiplication. This is useful if floating point
431 multiplication has much bigger throughput than integer multiply.
432 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
9c84384c 433 between 106 and 120 only.
d876f532
UD
434 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
435 SETFETZ is a macro which will disable all FPU exceptions and set rounding
436 towards zero, RESETFE should optionally reset it back. */
437
1e145589
JM
438#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
439 do \
440 { \
441 static const double _const[] = \
442 { \
443 /* 2^-24 */ 5.9604644775390625e-08, \
444 /* 2^-48 */ 3.5527136788005009e-15, \
445 /* 2^-72 */ 2.1175823681357508e-22, \
446 /* 2^-96 */ 1.2621774483536189e-29, \
447 /* 2^28 */ 2.68435456e+08, \
448 /* 2^4 */ 1.600000e+01, \
449 /* 2^-20 */ 9.5367431640625e-07, \
450 /* 2^-44 */ 5.6843418860808015e-14, \
451 /* 2^-68 */ 3.3881317890172014e-21, \
452 /* 2^-92 */ 2.0194839173657902e-28, \
453 /* 2^-116 */ 1.2037062152420224e-35 \
454 }; \
455 double _a240, _b240, _c240, _d240, _e240, _f240, \
456 _g240, _h240, _i240, _j240, _k240; \
457 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
458 _p240, _q240, _r240, _s240; \
459 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
460 \
7d67a196
JM
461 _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
462 "wfracbits out of range"); \
1e145589
JM
463 \
464 setfetz; \
465 \
51ca9e29
JM
466 _e240 = (double) (long) (X##_f0 & 0xffffff); \
467 _j240 = (double) (long) (Y##_f0 & 0xffffff); \
468 _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
469 _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
470 _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
471 _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
472 _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
473 _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
474 _a240 = (double) (long) (X##_f1 >> 32); \
475 _f240 = (double) (long) (Y##_f1 >> 32); \
1e145589
JM
476 _e240 *= _const[3]; \
477 _j240 *= _const[3]; \
478 _d240 *= _const[2]; \
479 _i240 *= _const[2]; \
480 _c240 *= _const[1]; \
481 _h240 *= _const[1]; \
482 _b240 *= _const[0]; \
483 _g240 *= _const[0]; \
484 _s240.d = _e240*_j240; \
485 _r240.d = _d240*_j240 + _e240*_i240; \
486 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \
487 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
488 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
489 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
490 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
491 _l240.d = _a240*_g240 + _b240*_f240; \
492 _k240 = _a240*_f240; \
493 _r240.d += _s240.d; \
494 _q240.d += _r240.d; \
495 _p240.d += _q240.d; \
496 _o240.d += _p240.d; \
497 _n240.d += _o240.d; \
498 _m240.d += _n240.d; \
499 _l240.d += _m240.d; \
500 _k240 += _l240.d; \
501 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
502 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
503 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
504 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
505 _o240.d += _const[7]; \
506 _n240.d += _const[6]; \
507 _m240.d += _const[5]; \
508 _l240.d += _const[4]; \
509 if (_s240.d != 0.0) \
510 _y240 = 1; \
511 if (_r240.d != 0.0) \
512 _y240 = 1; \
513 if (_q240.d != 0.0) \
514 _y240 = 1; \
515 if (_p240.d != 0.0) \
516 _y240 = 1; \
51ca9e29 517 _t240 = (DItype) _k240; \
1e145589
JM
518 _u240 = _l240.i; \
519 _v240 = _m240.i; \
520 _w240 = _n240.i; \
521 _x240 = _o240.i; \
522 R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
523 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
524 R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
525 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
526 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
527 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
528 | _y240); \
529 resetfe; \
530 } \
531 while (0)
d876f532 532
c4fe3ea7 533/* Division algorithms: */
d876f532
UD
534
535#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
1e145589
JM
536 do \
537 { \
3a6e9887
JM
538 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
539 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
540 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
541 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
542 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
543 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
544 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
51ca9e29 545 if (_FP_FRAC_GE_2 (X, Y)) \
1e145589 546 { \
3a6e9887
JM
547 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
548 _FP_DIV_MEAT_2_udiv_n_f1 \
549 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
550 _FP_DIV_MEAT_2_udiv_n_f0 \
551 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
1e145589
JM
552 } \
553 else \
554 { \
555 R##_e--; \
3a6e9887
JM
556 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
557 _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
558 _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
1e145589 559 } \
d876f532 560 \
1e145589 561 /* Normalize, i.e. make the most significant bit of the \
c4fe3ea7 562 denominator set. */ \
51ca9e29 563 _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
d876f532 564 \
3a6e9887
JM
565 udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
566 _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
567 Y##_f1); \
568 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
569 R##_f1, Y##_f0); \
570 _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
571 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
1e145589
JM
572 { \
573 R##_f1--; \
3a6e9887
JM
574 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
575 _FP_DIV_MEAT_2_udiv_r); \
576 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
577 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
578 _FP_DIV_MEAT_2_udiv_r)) \
1e145589
JM
579 { \
580 R##_f1--; \
3a6e9887
JM
581 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
582 _FP_DIV_MEAT_2_udiv_r); \
1e145589
JM
583 } \
584 } \
3a6e9887 585 _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
d876f532 586 \
3a6e9887 587 if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
1e145589
JM
588 { \
589 /* This is a special case, not an optimization \
3a6e9887
JM
590 (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
591 As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
592 R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
593 know what kind of bits it is (sticky, guard, round), \
594 we don't care. We also don't care what the reminder is, \
595 because the guard bit will be set anyway. -jj */ \
1e145589
JM
596 R##_f0 = -1; \
597 } \
598 else \
599 { \
3a6e9887
JM
600 udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
601 _FP_DIV_MEAT_2_udiv_r_f1, \
602 _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
603 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
604 _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
605 _FP_DIV_MEAT_2_udiv_r_f0 = 0; \
606 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
607 _FP_DIV_MEAT_2_udiv_r)) \
1e145589
JM
608 { \
609 R##_f0--; \
3a6e9887
JM
610 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
611 _FP_DIV_MEAT_2_udiv_r); \
612 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
613 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
614 _FP_DIV_MEAT_2_udiv_r)) \
1e145589
JM
615 { \
616 R##_f0--; \
3a6e9887
JM
617 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
618 _FP_DIV_MEAT_2_udiv_r); \
1e145589
JM
619 } \
620 } \
3a6e9887
JM
621 if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
622 _FP_DIV_MEAT_2_udiv_m)) \
1e145589
JM
623 R##_f0 |= _FP_WORK_STICKY; \
624 } \
625 } \
626 while (0)
d876f532
UD
627
628
c4fe3ea7
JM
629/* Square root algorithms:
630 We have just one right now, maybe Newton approximation
631 should be added for those machines where division is fast. */
9c84384c 632
1e145589
JM
633#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
634 do \
635 { \
636 while (q) \
637 { \
5c0508a3 638 T##_f1 = S##_f1 + (q); \
1e145589
JM
639 if (T##_f1 <= X##_f1) \
640 { \
5c0508a3 641 S##_f1 = T##_f1 + (q); \
1e145589 642 X##_f1 -= T##_f1; \
5c0508a3 643 R##_f1 += (q); \
1e145589 644 } \
51ca9e29 645 _FP_FRAC_SLL_2 (X, 1); \
5c0508a3 646 (q) >>= 1; \
1e145589 647 } \
5c0508a3
JM
648 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
649 while ((q) != _FP_WORK_ROUND) \
1e145589 650 { \
5c0508a3 651 T##_f0 = S##_f0 + (q); \
1e145589
JM
652 T##_f1 = S##_f1; \
653 if (T##_f1 < X##_f1 \
654 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
655 { \
5c0508a3 656 S##_f0 = T##_f0 + (q); \
1e145589 657 S##_f1 += (T##_f0 > S##_f0); \
51ca9e29 658 _FP_FRAC_DEC_2 (X, T); \
5c0508a3 659 R##_f0 += (q); \
1e145589 660 } \
51ca9e29 661 _FP_FRAC_SLL_2 (X, 1); \
5c0508a3 662 (q) >>= 1; \
1e145589
JM
663 } \
664 if (X##_f0 | X##_f1) \
665 { \
666 if (S##_f1 < X##_f1 \
667 || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
668 R##_f0 |= _FP_WORK_ROUND; \
669 R##_f0 |= _FP_WORK_STICKY; \
670 } \
671 } \
672 while (0)
d876f532
UD
673
674
c4fe3ea7
JM
675/* Assembly/disassembly for converting to/from integral types.
676 No shifting or overflow handled here. */
d876f532
UD
677
678#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
5c0508a3
JM
679 (void) (((rsize) <= _FP_W_TYPE_SIZE) \
680 ? ({ (r) = X##_f0; }) \
51ca9e29 681 : ({ \
5c0508a3
JM
682 (r) = X##_f1; \
683 (r) <<= _FP_W_TYPE_SIZE; \
684 (r) += X##_f0; \
51ca9e29 685 }))
d876f532 686
5c0508a3
JM
687#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
688 do \
689 { \
690 X##_f0 = (r); \
691 X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
692 ? 0 \
693 : (r) >> _FP_W_TYPE_SIZE); \
694 } \
1e145589 695 while (0)
d876f532 696
c4fe3ea7 697/* Convert FP values between word sizes. */
d876f532 698
fe0b1e85 699#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
d876f532 700
fe0b1e85 701#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
37002cbc 702
51ca9e29 703#define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
a2f8be9c
JM
704
705#endif /* !SOFT_FP_OP_2_H */