]> git.ipfire.org Git - thirdparty/glibc.git/blame - soft-fp/op-4.h
Fix error_tail overflow in allocation calculation.
[thirdparty/glibc.git] / soft-fp / op-4.h
CommitLineData
d876f532
UD
1/* Software floating-point emulation.
2 Basic four-word fraction declaration and manipulation.
568035b7 3 Copyright (C) 1997-2013 Free Software Foundation, Inc.
d876f532
UD
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
d876f532 14
638a783c
RM
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
23
d876f532
UD
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 27 Lesser General Public License for more details.
d876f532 28
41bdb6e2 29 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
d876f532
UD
32
33#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
34#define _FP_FRAC_COPY_4(D,S) \
35 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
36 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
37#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
38#define _FP_FRAC_HIGH_4(X) (X##_f[3])
39#define _FP_FRAC_LOW_4(X) (X##_f[0])
40#define _FP_FRAC_WORD_4(X,w) (X##_f[w])
41
42#define _FP_FRAC_SLL_4(X,N) \
43 do { \
44 _FP_I_TYPE _up, _down, _skip, _i; \
45 _skip = (N) / _FP_W_TYPE_SIZE; \
46 _up = (N) % _FP_W_TYPE_SIZE; \
47 _down = _FP_W_TYPE_SIZE - _up; \
48 if (!_up) \
49 for (_i = 3; _i >= _skip; --_i) \
50 X##_f[_i] = X##_f[_i-_skip]; \
51 else \
52 { \
53 for (_i = 3; _i > _skip; --_i) \
54 X##_f[_i] = X##_f[_i-_skip] << _up \
55 | X##_f[_i-_skip-1] >> _down; \
56 X##_f[_i--] = X##_f[0] << _up; \
57 } \
58 for (; _i >= 0; --_i) \
59 X##_f[_i] = 0; \
60 } while (0)
61
62/* This one was broken too */
63#define _FP_FRAC_SRL_4(X,N) \
64 do { \
65 _FP_I_TYPE _up, _down, _skip, _i; \
66 _skip = (N) / _FP_W_TYPE_SIZE; \
67 _down = (N) % _FP_W_TYPE_SIZE; \
68 _up = _FP_W_TYPE_SIZE - _down; \
69 if (!_down) \
70 for (_i = 0; _i <= 3-_skip; ++_i) \
71 X##_f[_i] = X##_f[_i+_skip]; \
72 else \
73 { \
74 for (_i = 0; _i < 3-_skip; ++_i) \
75 X##_f[_i] = X##_f[_i+_skip] >> _down \
76 | X##_f[_i+_skip+1] << _up; \
77 X##_f[_i++] = X##_f[3] >> _down; \
78 } \
79 for (; _i < 4; ++_i) \
80 X##_f[_i] = 0; \
81 } while (0)
82
83
9c84384c 84/* Right shift with sticky-lsb.
d876f532
UD
85 * What this actually means is that we do a standard right-shift,
86 * but that if any of the bits that fall off the right hand side
87 * were one then we always set the LSbit.
88 */
fe0b1e85
RM
89#define _FP_FRAC_SRST_4(X,S,N,size) \
90 do { \
91 _FP_I_TYPE _up, _down, _skip, _i; \
92 _FP_W_TYPE _s; \
93 _skip = (N) / _FP_W_TYPE_SIZE; \
94 _down = (N) % _FP_W_TYPE_SIZE; \
95 _up = _FP_W_TYPE_SIZE - _down; \
96 for (_s = _i = 0; _i < _skip; ++_i) \
97 _s |= X##_f[_i]; \
98 if (!_down) \
99 for (_i = 0; _i <= 3-_skip; ++_i) \
100 X##_f[_i] = X##_f[_i+_skip]; \
101 else \
102 { \
103 _s |= X##_f[_i] << _up; \
104 for (_i = 0; _i < 3-_skip; ++_i) \
105 X##_f[_i] = X##_f[_i+_skip] >> _down \
106 | X##_f[_i+_skip+1] << _up; \
107 X##_f[_i++] = X##_f[3] >> _down; \
108 } \
109 for (; _i < 4; ++_i) \
110 X##_f[_i] = 0; \
111 S = (_s != 0); \
112 } while (0)
113
114#define _FP_FRAC_SRS_4(X,N,size) \
115 do { \
116 int _sticky; \
117 _FP_FRAC_SRST_4(X, _sticky, N, size); \
118 X##_f[0] |= _sticky; \
d876f532
UD
119 } while (0)
120
121#define _FP_FRAC_ADD_4(R,X,Y) \
122 __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
123 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
124 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
125
126#define _FP_FRAC_SUB_4(R,X,Y) \
127 __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
128 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
129 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
130
131#define _FP_FRAC_DEC_4(X,Y) \
132 __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
133 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
134
135#define _FP_FRAC_ADDI_4(X,I) \
136 __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
137
138#define _FP_ZEROFRAC_4 0,0,0,0
139#define _FP_MINFRAC_4 0,0,0,1
140#define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
141
142#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
143#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0)
144#define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
77f01ab5
JM
145#define _FP_FRAC_HIGHBIT_DW_4(fs,X) \
146 (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
cf299341 147#define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
d876f532
UD
148
149#define _FP_FRAC_EQ_4(X,Y) \
150 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
151 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
152
153#define _FP_FRAC_GT_4(X,Y) \
154 (X##_f[3] > Y##_f[3] || \
155 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
156 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
157 (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \
158 )) \
159 )) \
160 )
161
162#define _FP_FRAC_GE_4(X,Y) \
163 (X##_f[3] > Y##_f[3] || \
164 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \
165 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \
166 (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \
167 )) \
168 )) \
169 )
170
171
172#define _FP_FRAC_CLZ_4(R,X) \
173 do { \
174 if (X##_f[3]) \
175 { \
176 __FP_CLZ(R,X##_f[3]); \
177 } \
178 else if (X##_f[2]) \
179 { \
180 __FP_CLZ(R,X##_f[2]); \
181 R += _FP_W_TYPE_SIZE; \
182 } \
183 else if (X##_f[1]) \
184 { \
52e1b618 185 __FP_CLZ(R,X##_f[1]); \
d876f532
UD
186 R += _FP_W_TYPE_SIZE*2; \
187 } \
188 else \
189 { \
190 __FP_CLZ(R,X##_f[0]); \
191 R += _FP_W_TYPE_SIZE*3; \
192 } \
193 } while(0)
194
195
196#define _FP_UNPACK_RAW_4(fs, X, val) \
197 do { \
198 union _FP_UNION_##fs _flo; _flo.flt = (val); \
199 X##_f[0] = _flo.bits.frac0; \
200 X##_f[1] = _flo.bits.frac1; \
201 X##_f[2] = _flo.bits.frac2; \
202 X##_f[3] = _flo.bits.frac3; \
203 X##_e = _flo.bits.exp; \
204 X##_s = _flo.bits.sign; \
205 } while (0)
206
207#define _FP_UNPACK_RAW_4_P(fs, X, val) \
208 do { \
209 union _FP_UNION_##fs *_flo = \
210 (union _FP_UNION_##fs *)(val); \
211 \
212 X##_f[0] = _flo->bits.frac0; \
213 X##_f[1] = _flo->bits.frac1; \
214 X##_f[2] = _flo->bits.frac2; \
215 X##_f[3] = _flo->bits.frac3; \
216 X##_e = _flo->bits.exp; \
217 X##_s = _flo->bits.sign; \
218 } while (0)
219
220#define _FP_PACK_RAW_4(fs, val, X) \
221 do { \
222 union _FP_UNION_##fs _flo; \
223 _flo.bits.frac0 = X##_f[0]; \
224 _flo.bits.frac1 = X##_f[1]; \
225 _flo.bits.frac2 = X##_f[2]; \
226 _flo.bits.frac3 = X##_f[3]; \
227 _flo.bits.exp = X##_e; \
228 _flo.bits.sign = X##_s; \
229 (val) = _flo.flt; \
230 } while (0)
231
232#define _FP_PACK_RAW_4_P(fs, val, X) \
233 do { \
234 union _FP_UNION_##fs *_flo = \
235 (union _FP_UNION_##fs *)(val); \
236 \
237 _flo->bits.frac0 = X##_f[0]; \
238 _flo->bits.frac1 = X##_f[1]; \
239 _flo->bits.frac2 = X##_f[2]; \
240 _flo->bits.frac3 = X##_f[3]; \
241 _flo->bits.exp = X##_e; \
242 _flo->bits.sign = X##_s; \
243 } while (0)
244
245/*
246 * Multiplication algorithms:
247 */
248
249/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
250
77f01ab5 251#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
d876f532 252 do { \
77f01ab5 253 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
d876f532
UD
254 _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \
255 \
77f01ab5 256 doit(_FP_FRAC_WORD_8(R,1), _FP_FRAC_WORD_8(R,0), X##_f[0], Y##_f[0]); \
d876f532
UD
257 doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \
258 doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \
259 doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \
260 doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \
261 doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \
77f01ab5
JM
262 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
263 _FP_FRAC_WORD_8(R,1), 0,_b_f1,_b_f0, \
264 0,0,_FP_FRAC_WORD_8(R,1)); \
265 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
266 _FP_FRAC_WORD_8(R,1), 0,_c_f1,_c_f0, \
267 _FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2), \
268 _FP_FRAC_WORD_8(R,1)); \
269 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
270 _FP_FRAC_WORD_8(R,2), 0,_d_f1,_d_f0, \
271 0,_FP_FRAC_WORD_8(R,3),_FP_FRAC_WORD_8(R,2)); \
272 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
273 _FP_FRAC_WORD_8(R,2), 0,_e_f1,_e_f0, \
274 _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
275 _FP_FRAC_WORD_8(R,2)); \
276 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
277 _FP_FRAC_WORD_8(R,2), 0,_f_f1,_f_f0, \
278 _FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3), \
279 _FP_FRAC_WORD_8(R,2)); \
d876f532
UD
280 doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \
281 doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \
282 doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \
283 doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \
77f01ab5
JM
284 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
285 _FP_FRAC_WORD_8(R,3), 0,_b_f1,_b_f0, \
286 0,_FP_FRAC_WORD_8(R,4),_FP_FRAC_WORD_8(R,3)); \
287 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
288 _FP_FRAC_WORD_8(R,3), 0,_c_f1,_c_f0, \
289 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
290 _FP_FRAC_WORD_8(R,3)); \
291 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
292 _FP_FRAC_WORD_8(R,3), 0,_d_f1,_d_f0, \
293 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
294 _FP_FRAC_WORD_8(R,3)); \
295 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
296 _FP_FRAC_WORD_8(R,3), 0,_e_f1,_e_f0, \
297 _FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4), \
298 _FP_FRAC_WORD_8(R,3)); \
d876f532
UD
299 doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \
300 doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \
301 doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \
302 doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \
303 doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \
77f01ab5
JM
304 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
305 _FP_FRAC_WORD_8(R,4), 0,_b_f1,_b_f0, \
306 0,_FP_FRAC_WORD_8(R,5),_FP_FRAC_WORD_8(R,4)); \
307 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
308 _FP_FRAC_WORD_8(R,4), 0,_c_f1,_c_f0, \
309 _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
310 _FP_FRAC_WORD_8(R,4)); \
311 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
312 _FP_FRAC_WORD_8(R,4), 0,_d_f1,_d_f0, \
313 _FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5), \
314 _FP_FRAC_WORD_8(R,4)); \
315 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
316 _FP_FRAC_WORD_8(R,5), 0,_e_f1,_e_f0, \
317 0,_FP_FRAC_WORD_8(R,6),_FP_FRAC_WORD_8(R,5)); \
318 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
319 _FP_FRAC_WORD_8(R,5), 0,_f_f1,_f_f0, \
320 _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
321 _FP_FRAC_WORD_8(R,5)); \
d876f532 322 doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \
77f01ab5 323 __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6), \
d876f532 324 _b_f1,_b_f0, \
77f01ab5
JM
325 _FP_FRAC_WORD_8(R,7),_FP_FRAC_WORD_8(R,6)); \
326 } while (0)
327
328#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
329 do { \
330 _FP_FRAC_DECL_8(_z); \
331 \
332 _FP_MUL_MEAT_DW_4_wide(wfracbits, _z, X, Y, doit); \
d876f532
UD
333 \
334 /* Normalize since we know where the msb of the multiplicands \
335 were (bit B), we know that the msb of the of the product is \
336 at either 2B or 2B-1. */ \
337 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
338 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
339 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
340 } while (0)
341
77f01ab5
JM
342#define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y) \
343 do { \
344 mpn_mul_n(R##_f, _x_f, _y_f, 4); \
345 } while (0)
346
d876f532
UD
347#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
348 do { \
349 _FP_FRAC_DECL_8(_z); \
350 \
77f01ab5 351 _FP_MUL_MEAT_DW_4_gmp(wfracbits, _z, X, Y); \
d876f532
UD
352 \
353 /* Normalize since we know where the msb of the multiplicands \
354 were (bit B), we know that the msb of the of the product is \
355 at either 2B or 2B-1. */ \
356 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
357 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
358 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
359 } while (0)
360
361/*
362 * Helper utility for _FP_DIV_MEAT_4_udiv:
363 * pppp = m * nnn
364 */
365#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \
366 do { \
367 UWtype _t; \
368 umul_ppmm(p1,p0,m,n0); \
369 umul_ppmm(p2,_t,m,n1); \
370 __FP_FRAC_ADDI_2(p2,p1,_t); \
371 umul_ppmm(p3,_t,m,n2); \
372 __FP_FRAC_ADDI_2(p3,p2,_t); \
373 } while (0)
374
375/*
376 * Division algorithms:
377 */
378
379#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
380 do { \
381 int _i; \
382 _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \
383 _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \
8399acaf 384 if (_FP_FRAC_GE_4(X, Y)) \
d876f532
UD
385 { \
386 _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
387 _FP_FRAC_SRL_4(X, 1); \
388 } \
389 else \
390 R##_e--; \
391 \
392 /* Normalize, i.e. make the most significant bit of the \
393 denominator set. */ \
394 _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \
395 \
396 for (_i = 3; ; _i--) \
397 { \
398 if (X##_f[3] == Y##_f[3]) \
399 { \
400 /* This is a special case, not an optimization \
401 (X##_f[3]/Y##_f[3] would not fit into UWtype). \
402 As X## is guaranteed to be < Y, R##_f[_i] can be either \
403 (UWtype)-1 or (UWtype)-2. */ \
404 R##_f[_i] = -1; \
405 if (!_i) \
406 break; \
407 __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
408 Y##_f[2], Y##_f[1], Y##_f[0], 0, \
409 X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
410 _FP_FRAC_SUB_4(X, Y, X); \
411 if (X##_f[3] > Y##_f[3]) \
412 { \
413 R##_f[_i] = -2; \
414 _FP_FRAC_ADD_4(X, Y, X); \
415 } \
416 } \
417 else \
418 { \
419 udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
420 umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \
421 R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \
422 X##_f[2] = X##_f[1]; \
423 X##_f[1] = X##_f[0]; \
424 X##_f[0] = _n_f[_i]; \
425 if (_FP_FRAC_GT_4(_m, X)) \
426 { \
427 R##_f[_i]--; \
428 _FP_FRAC_ADD_4(X, Y, X); \
429 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \
430 { \
431 R##_f[_i]--; \
432 _FP_FRAC_ADD_4(X, Y, X); \
433 } \
434 } \
435 _FP_FRAC_DEC_4(X, _m); \
436 if (!_i) \
437 { \
438 if (!_FP_FRAC_EQ_4(X, _m)) \
439 R##_f[0] |= _FP_WORK_STICKY; \
440 break; \
441 } \
442 } \
443 } \
444 } while (0)
445
446
447/*
448 * Square root algorithms:
449 * We have just one right now, maybe Newton approximation
450 * should be added for those machines where division is fast.
451 */
9c84384c 452
d876f532
UD
453#define _FP_SQRT_MEAT_4(R, S, T, X, q) \
454 do { \
455 while (q) \
456 { \
457 T##_f[3] = S##_f[3] + q; \
458 if (T##_f[3] <= X##_f[3]) \
459 { \
460 S##_f[3] = T##_f[3] + q; \
461 X##_f[3] -= T##_f[3]; \
462 R##_f[3] += q; \
463 } \
464 _FP_FRAC_SLL_4(X, 1); \
465 q >>= 1; \
466 } \
467 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
468 while (q) \
469 { \
470 T##_f[2] = S##_f[2] + q; \
471 T##_f[3] = S##_f[3]; \
472 if (T##_f[3] < X##_f[3] || \
473 (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
474 { \
475 S##_f[2] = T##_f[2] + q; \
476 S##_f[3] += (T##_f[2] > S##_f[2]); \
477 __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \
478 T##_f[3], T##_f[2]); \
479 R##_f[2] += q; \
480 } \
481 _FP_FRAC_SLL_4(X, 1); \
482 q >>= 1; \
483 } \
484 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
485 while (q) \
486 { \
487 T##_f[1] = S##_f[1] + q; \
488 T##_f[2] = S##_f[2]; \
489 T##_f[3] = S##_f[3]; \
490 if (T##_f[3] < X##_f[3] || \
491 (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \
492 (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \
493 { \
494 S##_f[1] = T##_f[1] + q; \
495 S##_f[2] += (T##_f[1] > S##_f[1]); \
496 S##_f[3] += (T##_f[2] > S##_f[2]); \
497 __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \
350635a5 498 T##_f[3], T##_f[2], T##_f[1]); \
d876f532
UD
499 R##_f[1] += q; \
500 } \
501 _FP_FRAC_SLL_4(X, 1); \
502 q >>= 1; \
503 } \
504 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
505 while (q != _FP_WORK_ROUND) \
506 { \
507 T##_f[0] = S##_f[0] + q; \
508 T##_f[1] = S##_f[1]; \
509 T##_f[2] = S##_f[2]; \
510 T##_f[3] = S##_f[3]; \
511 if (_FP_FRAC_GE_4(X,T)) \
512 { \
513 S##_f[0] = T##_f[0] + q; \
514 S##_f[1] += (T##_f[0] > S##_f[0]); \
515 S##_f[2] += (T##_f[1] > S##_f[1]); \
516 S##_f[3] += (T##_f[2] > S##_f[2]); \
517 _FP_FRAC_DEC_4(X, T); \
518 R##_f[0] += q; \
519 } \
520 _FP_FRAC_SLL_4(X, 1); \
521 q >>= 1; \
522 } \
523 if (!_FP_FRAC_ZEROP_4(X)) \
524 { \
525 if (_FP_FRAC_GT_4(X,S)) \
526 R##_f[0] |= _FP_WORK_ROUND; \
527 R##_f[0] |= _FP_WORK_STICKY; \
528 } \
529 } while (0)
530
531
532/*
9c84384c 533 * Internals
d876f532
UD
534 */
535
536#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \
537 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
538
539#ifndef __FP_FRAC_ADD_3
71b4dea7 540# define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
1a8aaf91 541 do { \
695c378f 542 _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2; \
1a8aaf91 543 r0 = x0 + y0; \
695c378f 544 __FP_FRAC_ADD_3_c1 = r0 < x0; \
1a8aaf91 545 r1 = x1 + y1; \
695c378f
JM
546 __FP_FRAC_ADD_3_c2 = r1 < x1; \
547 r1 += __FP_FRAC_ADD_3_c1; \
548 __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1; \
549 r2 = x2 + y2 + __FP_FRAC_ADD_3_c2; \
1a8aaf91 550 } while (0)
d876f532
UD
551#endif
552
553#ifndef __FP_FRAC_ADD_4
71b4dea7 554# define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
1a8aaf91 555 do { \
fe0b1e85 556 _FP_W_TYPE _c1, _c2, _c3; \
1a8aaf91
UD
557 r0 = x0 + y0; \
558 _c1 = r0 < x0; \
559 r1 = x1 + y1; \
560 _c2 = r1 < x1; \
561 r1 += _c1; \
562 _c2 |= r1 < _c1; \
563 r2 = x2 + y2; \
564 _c3 = r2 < x2; \
565 r2 += _c2; \
566 _c3 |= r2 < _c2; \
567 r3 = x3 + y3 + _c3; \
568 } while (0)
d876f532
UD
569#endif
570
571#ifndef __FP_FRAC_SUB_3
71b4dea7 572# define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
1a8aaf91 573 do { \
fe0b1e85 574 _FP_W_TYPE _c1, _c2; \
1a8aaf91
UD
575 r0 = x0 - y0; \
576 _c1 = r0 > x0; \
577 r1 = x1 - y1; \
578 _c2 = r1 > x1; \
579 r1 -= _c1; \
d68f5d2c 580 _c2 |= _c1 && (y1 == x1); \
1a8aaf91
UD
581 r2 = x2 - y2 - _c2; \
582 } while (0)
d876f532
UD
583#endif
584
585#ifndef __FP_FRAC_SUB_4
71b4dea7 586# define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
1a8aaf91 587 do { \
fe0b1e85 588 _FP_W_TYPE _c1, _c2, _c3; \
1a8aaf91
UD
589 r0 = x0 - y0; \
590 _c1 = r0 > x0; \
591 r1 = x1 - y1; \
592 _c2 = r1 > x1; \
593 r1 -= _c1; \
d68f5d2c 594 _c2 |= _c1 && (y1 == x1); \
1a8aaf91
UD
595 r2 = x2 - y2; \
596 _c3 = r2 > x2; \
597 r2 -= _c2; \
d68f5d2c 598 _c3 |= _c2 && (y2 == x2); \
1a8aaf91
UD
599 r3 = x3 - y3 - _c3; \
600 } while (0)
d876f532
UD
601#endif
602
603#ifndef __FP_FRAC_DEC_3
71b4dea7 604# define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \
d876f532 605 do { \
1a8aaf91
UD
606 UWtype _t0, _t1, _t2; \
607 _t0 = x0, _t1 = x1, _t2 = x2; \
608 __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \
d876f532
UD
609 } while (0)
610#endif
611
612#ifndef __FP_FRAC_DEC_4
71b4dea7 613# define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \
d876f532 614 do { \
1a8aaf91
UD
615 UWtype _t0, _t1, _t2, _t3; \
616 _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \
617 __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \
d876f532
UD
618 } while (0)
619#endif
620
621#ifndef __FP_FRAC_ADDI_4
71b4dea7 622# define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
d876f532
UD
623 do { \
624 UWtype _t; \
625 _t = ((x0 += i) < i); \
626 x1 += _t; _t = (x1 < _t); \
627 x2 += _t; _t = (x2 < _t); \
628 x3 += _t; \
629 } while (0)
630#endif
631
632/* Convert FP values between word sizes. This appears to be more
633 * complicated than I'd have expected it to be, so these might be
634 * wrong... These macros are in any case somewhat bogus because they
9c84384c 635 * use information about what various FRAC_n variables look like
d876f532 636 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
9c84384c 637 * the ones in op-2.h and op-1.h.
d876f532 638 */
fe0b1e85 639#define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
d876f532 640
fe0b1e85
RM
641#define _FP_FRAC_COPY_2_4(D, S) \
642do { \
643 D##_f0 = S##_f[0]; \
644 D##_f1 = S##_f[1]; \
645} while (0)
d876f532 646
9c84384c 647/* Assembly/disassembly for converting to/from integral types.
d876f532
UD
648 * No shifting or overflow handled here.
649 */
650/* Put the FP value X into r, which is an integer of size rsize. */
651#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
652 do { \
653 if (rsize <= _FP_W_TYPE_SIZE) \
654 r = X##_f[0]; \
655 else if (rsize <= 2*_FP_W_TYPE_SIZE) \
656 { \
657 r = X##_f[1]; \
658 r <<= _FP_W_TYPE_SIZE; \
659 r += X##_f[0]; \
660 } \
661 else \
662 { \
663 /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
664 /* and int == 4words as a single case. */ \
665 r = X##_f[3]; \
666 r <<= _FP_W_TYPE_SIZE; \
667 r += X##_f[2]; \
668 r <<= _FP_W_TYPE_SIZE; \
669 r += X##_f[1]; \
670 r <<= _FP_W_TYPE_SIZE; \
671 r += X##_f[0]; \
672 } \
673 } while (0)
674
675/* "No disassemble Number Five!" */
676/* move an integer of size rsize into X's fractional part. We rely on
677 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
678 * having to mask the values we store into it.
679 */
680#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
681 do { \
682 X##_f[0] = r; \
683 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
684 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
685 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
cb8f9562 686 } while (0)
d876f532 687
fe0b1e85
RM
688#define _FP_FRAC_COPY_4_1(D, S) \
689do { \
690 D##_f[0] = S##_f; \
691 D##_f[1] = D##_f[2] = D##_f[3] = 0; \
692} while (0)
693
694#define _FP_FRAC_COPY_4_2(D, S) \
695do { \
696 D##_f[0] = S##_f0; \
697 D##_f[1] = S##_f1; \
698 D##_f[2] = D##_f[3] = 0; \
699} while (0)
37002cbc
JJ
700
701#define _FP_FRAC_COPY_4_4(D,S) _FP_FRAC_COPY_4(D,S)