]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/libbid/bid_internal.h
Makefile.in (dfp-filenames): Replace decimal_globals...
[thirdparty/gcc.git] / libgcc / config / libbid / bid_internal.h
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
18
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
28
29 #ifndef __BIDECIMAL_H
30 #define __BIDECIMAL_H
31
32 #include "bid_conf.h"
33 #include "bid_functions.h"
34
35 #define __BID_INLINE__ static __inline
36
37 /*********************************************************************
38 *
39 * Logical Shift Macros
40 *
41 *********************************************************************/
42
43 #define __shr_128(Q, A, k) \
44 { \
45 (Q).w[0] = (A).w[0] >> k; \
46 (Q).w[0] |= (A).w[1] << (64-k); \
47 (Q).w[1] = (A).w[1] >> k; \
48 }
49
50 #define __shr_128_long(Q, A, k) \
51 { \
52 if((k)<64) { \
53 (Q).w[0] = (A).w[0] >> k; \
54 (Q).w[0] |= (A).w[1] << (64-k); \
55 (Q).w[1] = (A).w[1] >> k; \
56 } \
57 else { \
58 (Q).w[0] = (A).w[1]>>((k)-64); \
59 (Q).w[1] = 0; \
60 } \
61 }
62
63 #define __shl_128_long(Q, A, k) \
64 { \
65 if((k)<64) { \
66 (Q).w[1] = (A).w[1] << k; \
67 (Q).w[1] |= (A).w[0] >> (64-k); \
68 (Q).w[0] = (A).w[0] << k; \
69 } \
70 else { \
71 (Q).w[1] = (A).w[0]<<((k)-64); \
72 (Q).w[0] = 0; \
73 } \
74 }
75
76 #define __low_64(Q) (Q).w[0]
77 /*********************************************************************
78 *
79 * String Macros
80 *
81 *********************************************************************/
82 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
83 /*********************************************************************
84 *
85 * Compare Macros
86 *
87 *********************************************************************/
88 // greater than
89 // return 0 if A<=B
90 // non-zero if A>B
91 #define __unsigned_compare_gt_128(A, B) \
92 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
93 // greater-or-equal
94 #define __unsigned_compare_ge_128(A, B) \
95 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
96 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
97 // tighten exponent range
98 #define __tight_bin_range_128(bp, P, bin_expon) \
99 { \
100 UINT64 M; \
101 M = 1; \
102 (bp) = (bin_expon); \
103 if((bp)<63) { \
104 M <<= ((bp)+1); \
105 if((P).w[0] >= M) (bp)++; } \
106 else if((bp)>64) { \
107 M <<= ((bp)+1-64); \
108 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
109 (bp)++; } \
110 else if((P).w[1]) (bp)++; \
111 }
112 /*********************************************************************
113 *
114 * Add/Subtract Macros
115 *
116 *********************************************************************/
117 // add 64-bit value to 128-bit
118 #define __add_128_64(R128, A128, B64) \
119 { \
120 UINT64 R64H; \
121 R64H = (A128).w[1]; \
122 (R128).w[0] = (B64) + (A128).w[0]; \
123 if((R128).w[0] < (B64)) \
124 R64H ++; \
125 (R128).w[1] = R64H; \
126 }
127 // subtract 64-bit value from 128-bit
128 #define __sub_128_64(R128, A128, B64) \
129 { \
130 UINT64 R64H; \
131 R64H = (A128).w[1]; \
132 if((A128).w[0] < (B64)) \
133 R64H --; \
134 (R128).w[1] = R64H; \
135 (R128).w[0] = (A128).w[0] - (B64); \
136 }
137 // add 128-bit value to 128-bit
138 // assume no carry-out
139 #define __add_128_128(R128, A128, B128) \
140 { \
141 UINT128 Q128; \
142 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
143 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
144 if(Q128.w[0] < (B128).w[0]) \
145 Q128.w[1] ++; \
146 (R128).w[1] = Q128.w[1]; \
147 (R128).w[0] = Q128.w[0]; \
148 }
149 #define __sub_128_128(R128, A128, B128) \
150 { \
151 UINT128 Q128; \
152 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
153 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
154 if((A128).w[0] < (B128).w[0]) \
155 Q128.w[1] --; \
156 (R128).w[1] = Q128.w[1]; \
157 (R128).w[0] = Q128.w[0]; \
158 }
159 #define __add_carry_out(S, CY, X, Y) \
160 { \
161 UINT64 X1=X; \
162 S = X + Y; \
163 CY = (S<X1) ? 1 : 0; \
164 }
165 #define __add_carry_in_out(S, CY, X, Y, CI) \
166 { \
167 UINT64 X1; \
168 X1 = X + CI; \
169 S = X1 + Y; \
170 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
171 }
172 #define __sub_borrow_out(S, CY, X, Y) \
173 { \
174 UINT64 X1=X; \
175 S = X - Y; \
176 CY = (S>X1) ? 1 : 0; \
177 }
178 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
179 { \
180 UINT64 X1, X0=X; \
181 X1 = X - CI; \
182 S = X1 - Y; \
183 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
184 }
185 // increment C128 and check for rounding overflow:
186 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
187 #define INCREMENT(C_128, exp) \
188 { \
189 C_128.w[0]++; \
190 if (C_128.w[0] == 0) C_128.w[1]++; \
191 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
192 C_128.w[0] == 0x378d8e6400000000ull) { \
193 exp++; \
194 C_128.w[1] = 0x0000314dc6448d93ull; \
195 C_128.w[0] = 0x38c15b0a00000000ull; \
196 } \
197 }
198 // decrement C128 and check for rounding underflow, but only at the
199 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
200 // and decrement the exponent
201 #define DECREMENT(C_128, exp) \
202 { \
203 C_128.w[0]--; \
204 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
205 if (C_128.w[1] == 0x0000314dc6448d93ull && \
206 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
207 exp--; \
208 C_128.w[1] = 0x0001ed09bead87c0ull; \
209 C_128.w[0] = 0x378d8e63ffffffffull; \
210 } \
211 }
212
213 /*********************************************************************
214 *
215 * Multiply Macros
216 *
217 *********************************************************************/
218 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
219 /***************************************
220 * Signed, Full 64x64-bit Multiply
221 ***************************************/
222 #define __imul_64x64_to_128(P, CX, CY) \
223 { \
224 UINT64 SX, SY; \
225 __mul_64x64_to_128(P, CX, CY); \
226 \
227 SX = ((SINT64)(CX))>>63; \
228 SY = ((SINT64)(CY))>>63; \
229 SX &= CY; SY &= CX; \
230 \
231 (P).w[1] = (P).w[1] - SX - SY; \
232 }
233 /***************************************
234 * Signed, Full 64x128-bit Multiply
235 ***************************************/
236 #define __imul_64x128_full(Ph, Ql, A, B) \
237 { \
238 UINT128 ALBL, ALBH, QM2, QM; \
239 \
240 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
241 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
242 \
243 (Ql).w[0] = ALBL.w[0]; \
244 QM.w[0] = ALBL.w[1]; \
245 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
246 __add_128_128(QM2, ALBH, QM); \
247 (Ql).w[1] = QM2.w[0]; \
248 Ph = QM2.w[1]; \
249 }
250 /*****************************************************
251 * Unsigned Multiply Macros
252 *****************************************************/
253 // get full 64x64bit product
254 //
255 #define __mul_64x64_to_128(P, CX, CY) \
256 { \
257 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
258 CXH = (CX) >> 32; \
259 CXL = (UINT32)(CX); \
260 CYH = (CY) >> 32; \
261 CYL = (UINT32)(CY); \
262 \
263 PM = CXH*CYL; \
264 PH = CXH*CYH; \
265 PL = CXL*CYL; \
266 PM2 = CXL*CYH; \
267 PH += (PM>>32); \
268 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
269 \
270 (P).w[1] = PH + (PM>>32); \
271 (P).w[0] = (PM<<32)+(UINT32)PL; \
272 }
273 // get full 64x64bit product
274 // Note:
275 // This macro is used for CX < 2^61, CY < 2^61
276 //
277 #define __mul_64x64_to_128_fast(P, CX, CY) \
278 { \
279 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
280 CXH = (CX) >> 32; \
281 CXL = (UINT32)(CX); \
282 CYH = (CY) >> 32; \
283 CYL = (UINT32)(CY); \
284 \
285 PM = CXH*CYL; \
286 PL = CXL*CYL; \
287 PH = CXH*CYH; \
288 PM += CXL*CYH; \
289 PM += (PL>>32); \
290 \
291 (P).w[1] = PH + (PM>>32); \
292 (P).w[0] = (PM<<32)+(UINT32)PL; \
293 }
294 // used for CX< 2^60
295 #define __sqr64_fast(P, CX) \
296 { \
297 UINT64 CXH, CXL, PL, PH, PM; \
298 CXH = (CX) >> 32; \
299 CXL = (UINT32)(CX); \
300 \
301 PM = CXH*CXL; \
302 PL = CXL*CXL; \
303 PH = CXH*CXH; \
304 PM += PM; \
305 PM += (PL>>32); \
306 \
307 (P).w[1] = PH + (PM>>32); \
308 (P).w[0] = (PM<<32)+(UINT32)PL; \
309 }
310 // get full 64x64bit product
311 // Note:
312 // This implementation is used for CX < 2^61, CY < 2^61
313 //
314 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
315 { \
316 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
317 CXH = (CX) >> 32; \
318 CXL = (UINT32)(CX); \
319 CYH = (CY) >> 32; \
320 CYL = (UINT32)(CY); \
321 \
322 PM = CXH*CYL; \
323 PL = CXL*CYL; \
324 PH = CXH*CYH; \
325 PM += CXL*CYH; \
326 PM += (PL>>32); \
327 \
328 (P) = PH + (PM>>32); \
329 }
330 // get full 64x64bit product
331 //
332 #define __mul_64x64_to_128_full(P, CX, CY) \
333 { \
334 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
335 CXH = (CX) >> 32; \
336 CXL = (UINT32)(CX); \
337 CYH = (CY) >> 32; \
338 CYL = (UINT32)(CY); \
339 \
340 PM = CXH*CYL; \
341 PH = CXH*CYH; \
342 PL = CXL*CYL; \
343 PM2 = CXL*CYH; \
344 PH += (PM>>32); \
345 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
346 \
347 (P).w[1] = PH + (PM>>32); \
348 (P).w[0] = (PM<<32)+(UINT32)PL; \
349 }
350 #define __mul_128x128_high(Q, A, B) \
351 { \
352 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
353 \
354 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
355 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
356 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
357 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
358 \
359 __add_128_128(QM, ALBH, AHBL); \
360 __add_128_64(QM2, QM, ALBL.w[1]); \
361 __add_128_64((Q), AHBH, QM2.w[1]); \
362 }
363 #define __mul_128x128_full(Qh, Ql, A, B) \
364 { \
365 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
366 \
367 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
368 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
369 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
370 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
371 \
372 __add_128_128(QM, ALBH, AHBL); \
373 (Ql).w[0] = ALBL.w[0]; \
374 __add_128_64(QM2, QM, ALBL.w[1]); \
375 __add_128_64((Qh), AHBH, QM2.w[1]); \
376 (Ql).w[1] = QM2.w[0]; \
377 }
378 #define __mul_128x128_low(Ql, A, B) \
379 { \
380 UINT128 ALBL; \
381 UINT64 QM64; \
382 \
383 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
384 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
385 \
386 (Ql).w[0] = ALBL.w[0]; \
387 (Ql).w[1] = QM64 + ALBL.w[1]; \
388 }
389 #define __mul_64x128_low(Ql, A, B) \
390 { \
391 UINT128 ALBL, ALBH, QM2; \
392 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
393 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
394 (Ql).w[0] = ALBL.w[0]; \
395 __add_128_64(QM2, ALBH, ALBL.w[1]); \
396 (Ql).w[1] = QM2.w[0]; \
397 }
398 #define __mul_64x128_full(Ph, Ql, A, B) \
399 { \
400 UINT128 ALBL, ALBH, QM2; \
401 \
402 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
403 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
404 \
405 (Ql).w[0] = ALBL.w[0]; \
406 __add_128_64(QM2, ALBH, ALBL.w[1]); \
407 (Ql).w[1] = QM2.w[0]; \
408 Ph = QM2.w[1]; \
409 }
410 #define __mul_64x128_to_192(Q, A, B) \
411 { \
412 UINT128 ALBL, ALBH, QM2; \
413 \
414 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
415 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
416 \
417 (Q).w[0] = ALBL.w[0]; \
418 __add_128_64(QM2, ALBH, ALBL.w[1]); \
419 (Q).w[1] = QM2.w[0]; \
420 (Q).w[2] = QM2.w[1]; \
421 }
422 #define __mul_64x128_to192(Q, A, B) \
423 { \
424 UINT128 ALBL, ALBH, QM2; \
425 \
426 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
427 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
428 \
429 (Q).w[0] = ALBL.w[0]; \
430 __add_128_64(QM2, ALBH, ALBL.w[1]); \
431 (Q).w[1] = QM2.w[0]; \
432 (Q).w[2] = QM2.w[1]; \
433 }
434 #define __mul_128x128_to_256(P256, A, B) \
435 { \
436 UINT128 Qll, Qlh; \
437 UINT64 Phl, Phh, CY1, CY2; \
438 \
439 __mul_64x128_full(Phl, Qll, A.w[0], B); \
440 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
441 (P256).w[0] = Qll.w[0]; \
442 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
443 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
444 (P256).w[3] = Phh + CY2; \
445 }
446 //
447 // For better performance, will check A.w[1] against 0,
448 // but not B.w[1]
449 // Use this macro accordingly
450 #define __mul_128x128_to_256_check_A(P256, A, B) \
451 { \
452 UINT128 Qll, Qlh; \
453 UINT64 Phl, Phh, CY1, CY2; \
454 \
455 __mul_64x128_full(Phl, Qll, A.w[0], B); \
456 (P256).w[0] = Qll.w[0]; \
457 if(A.w[1]) { \
458 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
459 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
460 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
461 (P256).w[3] = Phh + CY2; } \
462 else { \
463 (P256).w[1] = Qll.w[1]; \
464 (P256).w[2] = Phl; \
465 (P256).w[3] = 0; } \
466 }
467 #define __mul_64x192_to_256(lP, lA, lB) \
468 { \
469 UINT128 lP0,lP1,lP2; \
470 UINT64 lC; \
471 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
472 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
473 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
474 (lP).w[0] = lP0.w[0]; \
475 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
476 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
477 (lP).w[3] = lP2.w[1] + lC; \
478 }
479 #define __mul_64x256_to_320(P, A, B) \
480 { \
481 UINT128 lP0,lP1,lP2,lP3; \
482 UINT64 lC; \
483 __mul_64x64_to_128(lP0, A, (B).w[0]); \
484 __mul_64x64_to_128(lP1, A, (B).w[1]); \
485 __mul_64x64_to_128(lP2, A, (B).w[2]); \
486 __mul_64x64_to_128(lP3, A, (B).w[3]); \
487 (P).w[0] = lP0.w[0]; \
488 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
489 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
490 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
491 (P).w[4] = lP3.w[1] + lC; \
492 }
493 #define __mul_192x192_to_384(P, A, B) \
494 { \
495 UINT256 P0,P1,P2; \
496 UINT64 CY; \
497 __mul_64x192_to_256(P0, (A).w[0], B); \
498 __mul_64x192_to_256(P1, (A).w[1], B); \
499 __mul_64x192_to_256(P2, (A).w[2], B); \
500 (P).w[0] = P0.w[0]; \
501 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
502 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
503 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
504 (P).w[4] = P1.w[3] + CY; \
505 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
506 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
507 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
508 (P).w[5] = P2.w[3] + CY; \
509 }
510 #define __mul_64x320_to_384(P, A, B) \
511 { \
512 UINT128 lP0,lP1,lP2,lP3,lP4; \
513 UINT64 lC; \
514 __mul_64x64_to_128(lP0, A, (B).w[0]); \
515 __mul_64x64_to_128(lP1, A, (B).w[1]); \
516 __mul_64x64_to_128(lP2, A, (B).w[2]); \
517 __mul_64x64_to_128(lP3, A, (B).w[3]); \
518 __mul_64x64_to_128(lP4, A, (B).w[4]); \
519 (P).w[0] = lP0.w[0]; \
520 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
521 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
522 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
523 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
524 (P).w[5] = lP4.w[1] + lC; \
525 }
526 // A*A
527 // Full 128x128-bit product
528 #define __sqr128_to_256(P256, A) \
529 { \
530 UINT128 Qll, Qlh, Qhh; \
531 UINT64 TMP_C1, TMP_C2; \
532 \
533 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
534 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
535 Qhh.w[1] += (Qlh.w[1]>>63); \
536 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
537 Qlh.w[0] += Qlh.w[0]; \
538 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
539 \
540 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
541 (P256).w[0] = Qll.w[0]; \
542 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
543 (P256).w[3] = Qhh.w[1]+TMP_C2; \
544 }
545 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
546 { \
547 UINT128 Qll, Qlh; \
548 UINT64 Phl, Phh, C1, C2; \
549 \
550 __mul_64x128_full(Phl, Qll, A.w[0], B); \
551 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
552 (PQl).w[0] = Qll.w[0]; \
553 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
554 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
555 (PQh).w[1] = Phh + C2; \
556 }
557 #define __mul_256x256_to_512(P, A, B) \
558 { \
559 UINT512 P0,P1,P2,P3; \
560 UINT64 CY; \
561 __mul_64x256_to_320(P0, (A).w[0], B); \
562 __mul_64x256_to_320(P1, (A).w[1], B); \
563 __mul_64x256_to_320(P2, (A).w[2], B); \
564 __mul_64x256_to_320(P3, (A).w[3], B); \
565 (P).w[0] = P0.w[0]; \
566 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
567 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
568 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
569 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
570 (P).w[5] = P1.w[4] + CY; \
571 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
572 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
573 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
574 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
575 (P).w[6] = P2.w[4] + CY; \
576 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
577 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
578 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
579 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
580 (P).w[7] = P3.w[4] + CY; \
581 }
582 #define __mul_192x256_to_448(P, A, B) \
583 { \
584 UINT512 P0,P1,P2; \
585 UINT64 CY; \
586 __mul_64x256_to_320(P0, (A).w[0], B); \
587 __mul_64x256_to_320(P1, (A).w[1], B); \
588 __mul_64x256_to_320(P2, (A).w[2], B); \
589 (P).w[0] = P0.w[0]; \
590 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
591 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
592 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
593 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
594 (P).w[5] = P1.w[4] + CY; \
595 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
596 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
597 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
598 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
599 (P).w[6] = P2.w[4] + CY; \
600 }
601 #define __mul_320x320_to_640(P, A, B) \
602 { \
603 UINT512 P0,P1,P2,P3; \
604 UINT64 CY; \
605 __mul_256x256_to_512((P), (A), B); \
606 __mul_64x256_to_320(P1, (A).w[4], B); \
607 __mul_64x256_to_320(P2, (B).w[4], A); \
608 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
609 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
610 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
611 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
612 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
613 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
614 P3.w[1] += CY; \
615 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
616 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
617 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
618 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
619 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
620 (P).w[9] = P3.w[1] + CY; \
621 }
622 #define __mul_384x384_to_768(P, A, B) \
623 { \
624 UINT512 P0,P1,P2,P3; \
625 UINT64 CY; \
626 __mul_320x320_to_640((P), (A), B); \
627 __mul_64x320_to_384(P1, (A).w[5], B); \
628 __mul_64x320_to_384(P2, (B).w[5], A); \
629 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
630 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
631 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
632 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
633 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
634 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
635 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
636 P3.w[1] += CY; \
637 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
638 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
639 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
640 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
641 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
642 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
643 (P).w[11] = P3.w[1] + CY; \
644 }
645 #define __mul_64x128_short(Ql, A, B) \
646 { \
647 UINT64 ALBH_L; \
648 \
649 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
650 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
651 \
652 (Ql).w[1] += ALBH_L; \
653 }
654 #define __scale128_10(D,_TMP) \
655 { \
656 UINT128 _TMP2,_TMP8; \
657 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
658 _TMP2.w[0] = _TMP.w[0]<<1; \
659 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
660 _TMP8.w[0] = _TMP.w[0]<<3; \
661 __add_128_128(D, _TMP2, _TMP8); \
662 }
663 // 64x64-bit product
664 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
665 { \
666 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
667 CXH = (CX64) >> 32; \
668 CXL = (UINT32)(CX64); \
669 CYH = (CY64) >> 32; \
670 CYL = (UINT32)(CY64); \
671 PM = CXH*CYL; \
672 PH = CXH*CYH; \
673 PL = CXL*CYL; \
674 PM2 = CXL*CYH; \
675 PH += (PM>>32); \
676 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
677 (P128).w[1] = PH + (PM>>32); \
678 (P128).w[0] = (PM<<32)+(UINT32)PL; \
679 }
680 // 64x64-bit product
681 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
682 { \
683 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
684 CXH = (CX64) >> 32; \
685 CXL = (UINT32)(CX64); \
686 CYH = (CY64) >> 32; \
687 CYL = (UINT32)(CY64); \
688 PM = CXH*CYL; \
689 PH = CXH*CYH; \
690 PL = CXL*CYL; \
691 PM2 = CXL*CYH; \
692 PH += (PM>>32); \
693 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
694 P64 = PH + (PM>>32); \
695 }
696 #define __mul_128x64_to_128(Q128, A64, B128) \
697 { \
698 UINT64 ALBH_L; \
699 ALBH_L = (A64) * (B128).w[1]; \
700 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
701 (Q128).w[1] += ALBH_L; \
702 }
703 // might simplify by calculating just QM2.w[0]
704 #define __mul_64x128_to_128(Ql, A, B) \
705 { \
706 UINT128 ALBL, ALBH, QM2; \
707 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
708 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
709 (Ql).w[0] = ALBL.w[0]; \
710 __add_128_64(QM2, ALBH, ALBL.w[1]); \
711 (Ql).w[1] = QM2.w[0]; \
712 }
713 /*********************************************************************
714 *
715 * BID Pack/Unpack Macros
716 *
717 *********************************************************************/
718 /////////////////////////////////////////
719 // BID64 definitions
720 ////////////////////////////////////////
721 #define DECIMAL_MAX_EXPON_64 767
722 #define DECIMAL_EXPONENT_BIAS 398
723 #define MAX_FORMAT_DIGITS 16
724 /////////////////////////////////////////
725 // BID128 definitions
726 ////////////////////////////////////////
727 #define DECIMAL_MAX_EXPON_128 12287
728 #define DECIMAL_EXPONENT_BIAS_128 6176
729 #define MAX_FORMAT_DIGITS_128 34
730 /////////////////////////////////////////
731 // BID32 definitions
732 ////////////////////////////////////////
733 #define DECIMAL_MAX_EXPON_32 191
734 #define DECIMAL_EXPONENT_BIAS_32 101
735 #define MAX_FORMAT_DIGITS_32 7
736 ////////////////////////////////////////
737 // Constant Definitions
738 ///////////////////////////////////////
739 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
740 #define INFINITY_MASK64 0x7800000000000000ull
741 #define SINFINITY_MASK64 0xf800000000000000ull
742 #define SSNAN_MASK64 0xfc00000000000000ull
743 #define NAN_MASK64 0x7c00000000000000ull
744 #define SNAN_MASK64 0x7e00000000000000ull
745 #define QUIET_MASK64 0xfdffffffffffffffull
746 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
747 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
748 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
749 #define EXPONENT_MASK64 0x3ff
750 #define EXPONENT_SHIFT_LARGE64 51
751 #define EXPONENT_SHIFT_SMALL64 53
752 #define LARGEST_BID64 0x77fb86f26fc0ffffull
753 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
754 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
755 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
756 #define EXPONENT_MASK128 0x3fff
757 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
758 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
759 #define SPECIAL_ENCODING_MASK32 0x60000000ul
760 #define INFINITY_MASK32 0x78000000ul
761 #define LARGE_COEFF_MASK32 0x007ffffful
762 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
763 #define SMALL_COEFF_MASK32 0x001ffffful
764 #define EXPONENT_MASK32 0xff
765 #define LARGEST_BID32 0x77f8967f
766 #define NAN_MASK32 0x7c000000
767 #define SNAN_MASK32 0x7e000000
768 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
769 #define BINARY_EXPONENT_BIAS 0x3ff
770 #define UPPER_EXPON_LIMIT 51
771 // data needed for BID pack/unpack macros
772 extern UINT64 round_const_table[][19];
773 extern UINT128 reciprocals10_128[];
774 extern int recip_scale[];
775 extern UINT128 power10_table_128[];
776 extern int estimate_decimal_digits[];
777 extern int estimate_bin_expon[];
778 extern UINT64 power10_index_binexp[];
779 extern int short_recip_scale[];
780 extern UINT64 reciprocals10_64[];
781 extern UINT128 power10_index_binexp_128[];
782 extern UINT128 round_const_table_128[][36];
783
784
785 //////////////////////////////////////////////
786 // Status Flag Handling
787 /////////////////////////////////////////////
788 #define __set_status_flags(fpsc, status) *(fpsc) |= status
789 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
790
791 __BID_INLINE__ UINT64
792 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
793 UINT64 * pcoefficient_x, UINT64 x) {
794 UINT64 tmp, coeff;
795
796 *psign_x = x & 0x8000000000000000ull;
797
798 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
799 // special encodings
800 // coefficient
801 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
802
803 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
804 *pexponent_x = 0;
805 *pcoefficient_x = x & 0xfe03ffffffffffffull;
806 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
807 *pcoefficient_x = x & 0xfe00000000000000ull;
808 if ((x & NAN_MASK64) == INFINITY_MASK64)
809 *pcoefficient_x = x & SINFINITY_MASK64;
810 return 0; // NaN or Infinity
811 }
812 // check for non-canonical values
813 if (coeff >= 10000000000000000ull)
814 coeff = 0;
815 *pcoefficient_x = coeff;
816 // get exponent
817 tmp = x >> EXPONENT_SHIFT_LARGE64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 return coeff;
820 }
821 // exponent
822 tmp = x >> EXPONENT_SHIFT_SMALL64;
823 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
824 // coefficient
825 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
826
827 return *pcoefficient_x;
828 }
829
830 //
831 // BID64 pack macro (general form)
832 //
833 __BID_INLINE__ UINT64
834 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
835 unsigned *fpsc) {
836 UINT128 Stemp, Q_low;
837 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
838 int extra_digits, amount, amount2;
839 unsigned status;
840
841 if (coeff > 9999999999999999ull) {
842 expon++;
843 coeff = 1000000000000000ull;
844 }
845 // check for possible underflow/overflow
846 if (((unsigned) expon) >= 3 * 256) {
847 if (expon < 0) {
848 // underflow
849 if (expon + MAX_FORMAT_DIGITS < 0) {
850 #ifdef SET_STATUS_FLAGS
851 __set_status_flags (fpsc,
852 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
853 #endif
854 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
855 #ifndef IEEE_ROUND_NEAREST
856 if (rmode == ROUNDING_DOWN && sgn)
857 return 0x8000000000000001ull;
858 if (rmode == ROUNDING_UP && !sgn)
859 return 1ull;
860 #endif
861 #endif
862 // result is 0
863 return sgn;
864 }
865 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
866 #ifndef IEEE_ROUND_NEAREST
867 if (sgn && (unsigned) (rmode - 1) < 2)
868 rmode = 3 - rmode;
869 #endif
870 #endif
871 // get digits to be shifted out
872 extra_digits = -expon;
873 coeff += round_const_table[rmode][extra_digits];
874
875 // get coeff*(2^M[extra_digits])/10^extra_digits
876 __mul_64x128_full (QH, Q_low, coeff,
877 reciprocals10_128[extra_digits]);
878
879 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
880 amount = recip_scale[extra_digits];
881
882 C64 = QH >> amount;
883
884 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
885 #ifndef IEEE_ROUND_NEAREST
886 if (rmode == 0) //ROUNDING_TO_NEAREST
887 #endif
888 if (C64 & 1) {
889 // check whether fractional part of initial_P/10^extra_digits is exactly .5
890
891 // get remainder
892 amount2 = 64 - amount;
893 remainder_h = 0;
894 remainder_h--;
895 remainder_h >>= amount2;
896 remainder_h = remainder_h & QH;
897
898 if (!remainder_h
899 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
900 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
901 && Q_low.w[0] <
902 reciprocals10_128[extra_digits].w[0]))) {
903 C64--;
904 }
905 }
906 #endif
907
908 #ifdef SET_STATUS_FLAGS
909
910 if (is_inexact (fpsc))
911 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
912 else {
913 status = INEXACT_EXCEPTION;
914 // get remainder
915 remainder_h = QH << (64 - amount);
916
917 switch (rmode) {
918 case ROUNDING_TO_NEAREST:
919 case ROUNDING_TIES_AWAY:
920 // test whether fractional part is 0
921 if (remainder_h == 0x8000000000000000ull
922 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
923 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
924 && Q_low.w[0] <
925 reciprocals10_128[extra_digits].w[0])))
926 status = EXACT_STATUS;
927 break;
928 case ROUNDING_DOWN:
929 case ROUNDING_TO_ZERO:
930 if (!remainder_h
931 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
932 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
933 && Q_low.w[0] <
934 reciprocals10_128[extra_digits].w[0])))
935 status = EXACT_STATUS;
936 break;
937 default:
938 // round up
939 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
940 reciprocals10_128[extra_digits].w[0]);
941 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
942 reciprocals10_128[extra_digits].w[1], CY);
943 if ((remainder_h >> (64 - amount)) + carry >=
944 (((UINT64) 1) << amount))
945 status = EXACT_STATUS;
946 }
947
948 if (status != EXACT_STATUS)
949 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
950 }
951
952 #endif
953
954 return sgn | C64;
955 }
956 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
957 expon--;
958 coeff = (coeff << 3) + (coeff << 1);
959 }
960 if (expon > DECIMAL_MAX_EXPON_64) {
961 #ifdef SET_STATUS_FLAGS
962 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
963 #endif
964 // overflow
965 r = sgn | INFINITY_MASK64;
966 switch (rmode) {
967 case ROUNDING_DOWN:
968 if (!sgn)
969 r = LARGEST_BID64;
970 break;
971 case ROUNDING_TO_ZERO:
972 r = sgn | LARGEST_BID64;
973 break;
974 case ROUNDING_UP:
975 // round up
976 if (sgn)
977 r = SMALLEST_BID64;
978 }
979 return r;
980 }
981 }
982
983 mask = 1;
984 mask <<= EXPONENT_SHIFT_SMALL64;
985
986 // check whether coefficient fits in 10*5+3 bits
987 if (coeff < mask) {
988 r = expon;
989 r <<= EXPONENT_SHIFT_SMALL64;
990 r |= (coeff | sgn);
991 return r;
992 }
993 // special format
994
995 // eliminate the case coeff==10^16 after rounding
996 if (coeff == 10000000000000000ull) {
997 r = expon + 1;
998 r <<= EXPONENT_SHIFT_SMALL64;
999 r |= (1000000000000000ull | sgn);
1000 return r;
1001 }
1002
1003 r = expon;
1004 r <<= EXPONENT_SHIFT_LARGE64;
1005 r |= (sgn | SPECIAL_ENCODING_MASK64);
1006 // add coeff, without leading bits
1007 mask = (mask >> 2) - 1;
1008 coeff &= mask;
1009 r |= coeff;
1010
1011 return r;
1012 }
1013
1014
1015
1016
1017 //
1018 // No overflow/underflow checking
1019 //
1020 __BID_INLINE__ UINT64
1021 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1022 UINT64 r, mask;
1023
1024 mask = 1;
1025 mask <<= EXPONENT_SHIFT_SMALL64;
1026
1027 // check whether coefficient fits in 10*5+3 bits
1028 if (coeff < mask) {
1029 r = expon;
1030 r <<= EXPONENT_SHIFT_SMALL64;
1031 r |= (coeff | sgn);
1032 return r;
1033 }
1034 // special format
1035
1036 // eliminate the case coeff==10^16 after rounding
1037 if (coeff == 10000000000000000ull) {
1038 r = expon + 1;
1039 r <<= EXPONENT_SHIFT_SMALL64;
1040 r |= (1000000000000000ull | sgn);
1041 return r;
1042 }
1043
1044 r = expon;
1045 r <<= EXPONENT_SHIFT_LARGE64;
1046 r |= (sgn | SPECIAL_ENCODING_MASK64);
1047 // add coeff, without leading bits
1048 mask = (mask >> 2) - 1;
1049 coeff &= mask;
1050 r |= coeff;
1051
1052 return r;
1053 }
1054
1055
1056 //
1057 // no underflow checking
1058 //
1059 __BID_INLINE__ UINT64
1060 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1061 unsigned *fpsc) {
1062 UINT64 r, mask;
1063
1064 if (((unsigned) expon) >= 3 * 256 - 1) {
1065 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1066 expon = 3 * 256;
1067 coeff = 1000000000000000ull;
1068 }
1069
1070 if (((unsigned) expon) >= 3 * 256) {
1071 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1072 expon--;
1073 coeff = (coeff << 3) + (coeff << 1);
1074 }
1075 if (expon > DECIMAL_MAX_EXPON_64) {
1076 #ifdef SET_STATUS_FLAGS
1077 __set_status_flags (fpsc,
1078 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1079 #endif
1080 // overflow
1081 r = sgn | INFINITY_MASK64;
1082 switch (rmode) {
1083 case ROUNDING_DOWN:
1084 if (!sgn)
1085 r = LARGEST_BID64;
1086 break;
1087 case ROUNDING_TO_ZERO:
1088 r = sgn | LARGEST_BID64;
1089 break;
1090 case ROUNDING_UP:
1091 // round up
1092 if (sgn)
1093 r = SMALLEST_BID64;
1094 }
1095 return r;
1096 }
1097 }
1098 }
1099
1100 mask = 1;
1101 mask <<= EXPONENT_SHIFT_SMALL64;
1102
1103 // check whether coefficient fits in 10*5+3 bits
1104 if (coeff < mask) {
1105 r = expon;
1106 r <<= EXPONENT_SHIFT_SMALL64;
1107 r |= (coeff | sgn);
1108 return r;
1109 }
1110 // special format
1111
1112 // eliminate the case coeff==10^16 after rounding
1113 if (coeff == 10000000000000000ull) {
1114 r = expon + 1;
1115 r <<= EXPONENT_SHIFT_SMALL64;
1116 r |= (1000000000000000ull | sgn);
1117 return r;
1118 }
1119
1120 r = expon;
1121 r <<= EXPONENT_SHIFT_LARGE64;
1122 r |= (sgn | SPECIAL_ENCODING_MASK64);
1123 // add coeff, without leading bits
1124 mask = (mask >> 2) - 1;
1125 coeff &= mask;
1126 r |= coeff;
1127
1128 return r;
1129 }
1130
1131
1132 //
1133 // No overflow/underflow checking
1134 // or checking for coefficients equal to 10^16 (after rounding)
1135 //
1136 __BID_INLINE__ UINT64
1137 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1138 UINT64 r, mask;
1139
1140 mask = 1;
1141 mask <<= EXPONENT_SHIFT_SMALL64;
1142
1143 // check whether coefficient fits in 10*5+3 bits
1144 if (coeff < mask) {
1145 r = expon;
1146 r <<= EXPONENT_SHIFT_SMALL64;
1147 r |= (coeff | sgn);
1148 return r;
1149 }
1150 // special format
1151 r = expon;
1152 r <<= EXPONENT_SHIFT_LARGE64;
1153 r |= (sgn | SPECIAL_ENCODING_MASK64);
1154 // add coeff, without leading bits
1155 mask = (mask >> 2) - 1;
1156 coeff &= mask;
1157 r |= coeff;
1158
1159 return r;
1160 }
1161
1162 //
1163 // No overflow/underflow checking or checking for coefficients above 2^53
1164 //
1165 __BID_INLINE__ UINT64
1166 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1167 // no UF/OF
1168 UINT64 r;
1169
1170 r = expon;
1171 r <<= EXPONENT_SHIFT_SMALL64;
1172 r |= (coeff | sgn);
1173 return r;
1174 }
1175
1176
1177 //
1178 // This pack macro is used when underflow is known to occur
1179 //
1180 __BID_INLINE__ UINT64
1181 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1182 unsigned *fpsc) {
1183 UINT128 C128, Q_low, Stemp;
1184 UINT64 C64, remainder_h, QH, carry, CY;
1185 int extra_digits, amount, amount2;
1186 unsigned status;
1187
1188 // underflow
1189 if (expon + MAX_FORMAT_DIGITS < 0) {
1190 #ifdef SET_STATUS_FLAGS
1191 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1192 #endif
1193 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1194 #ifndef IEEE_ROUND_NEAREST
1195 if (rmode == ROUNDING_DOWN && sgn)
1196 return 0x8000000000000001ull;
1197 if (rmode == ROUNDING_UP && !sgn)
1198 return 1ull;
1199 #endif
1200 #endif
1201 // result is 0
1202 return sgn;
1203 }
1204 // 10*coeff
1205 coeff = (coeff << 3) + (coeff << 1);
1206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1207 #ifndef IEEE_ROUND_NEAREST
1208 if (sgn && (unsigned) (rmode - 1) < 2)
1209 rmode = 3 - rmode;
1210 #endif
1211 #endif
1212 if (R)
1213 coeff |= 1;
1214 // get digits to be shifted out
1215 extra_digits = 1 - expon;
1216 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1217
1218 // get coeff*(2^M[extra_digits])/10^extra_digits
1219 __mul_64x128_full (QH, Q_low, C128.w[0],
1220 reciprocals10_128[extra_digits]);
1221
1222 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1223 amount = recip_scale[extra_digits];
1224
1225 C64 = QH >> amount;
1226 //__shr_128(C128, Q_high, amount);
1227
1228 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1229 #ifndef IEEE_ROUND_NEAREST
1230 if (rmode == 0) //ROUNDING_TO_NEAREST
1231 #endif
1232 if (C64 & 1) {
1233 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1234
1235 // get remainder
1236 amount2 = 64 - amount;
1237 remainder_h = 0;
1238 remainder_h--;
1239 remainder_h >>= amount2;
1240 remainder_h = remainder_h & QH;
1241
1242 if (!remainder_h
1243 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1244 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1245 && Q_low.w[0] <
1246 reciprocals10_128[extra_digits].w[0]))) {
1247 C64--;
1248 }
1249 }
1250 #endif
1251
1252 #ifdef SET_STATUS_FLAGS
1253
1254 if (is_inexact (fpsc))
1255 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1256 else {
1257 status = INEXACT_EXCEPTION;
1258 // get remainder
1259 remainder_h = QH << (64 - amount);
1260
1261 switch (rmode) {
1262 case ROUNDING_TO_NEAREST:
1263 case ROUNDING_TIES_AWAY:
1264 // test whether fractional part is 0
1265 if (remainder_h == 0x8000000000000000ull
1266 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1267 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1268 && Q_low.w[0] <
1269 reciprocals10_128[extra_digits].w[0])))
1270 status = EXACT_STATUS;
1271 break;
1272 case ROUNDING_DOWN:
1273 case ROUNDING_TO_ZERO:
1274 if (!remainder_h
1275 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1276 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1277 && Q_low.w[0] <
1278 reciprocals10_128[extra_digits].w[0])))
1279 status = EXACT_STATUS;
1280 break;
1281 default:
1282 // round up
1283 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1284 reciprocals10_128[extra_digits].w[0]);
1285 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1286 reciprocals10_128[extra_digits].w[1], CY);
1287 if ((remainder_h >> (64 - amount)) + carry >=
1288 (((UINT64) 1) << amount))
1289 status = EXACT_STATUS;
1290 }
1291
1292 if (status != EXACT_STATUS)
1293 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1294 }
1295
1296 #endif
1297
1298 return sgn | C64;
1299
1300 }
1301
1302
1303
1304 //
1305 // This pack macro doesnot check for coefficients above 2^53
1306 //
1307 __BID_INLINE__ UINT64
1308 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1309 int rmode, unsigned *fpsc) {
1310 UINT128 C128, Q_low, Stemp;
1311 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1312 int extra_digits, amount, amount2;
1313 unsigned status;
1314
1315 // check for possible underflow/overflow
1316 if (((unsigned) expon) >= 3 * 256) {
1317 if (expon < 0) {
1318 // underflow
1319 if (expon + MAX_FORMAT_DIGITS < 0) {
1320 #ifdef SET_STATUS_FLAGS
1321 __set_status_flags (fpsc,
1322 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1323 #endif
1324 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1325 #ifndef IEEE_ROUND_NEAREST
1326 if (rmode == ROUNDING_DOWN && sgn)
1327 return 0x8000000000000001ull;
1328 if (rmode == ROUNDING_UP && !sgn)
1329 return 1ull;
1330 #endif
1331 #endif
1332 // result is 0
1333 return sgn;
1334 }
1335 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1336 #ifndef IEEE_ROUND_NEAREST
1337 if (sgn && (unsigned) (rmode - 1) < 2)
1338 rmode = 3 - rmode;
1339 #endif
1340 #endif
1341 // get digits to be shifted out
1342 extra_digits = -expon;
1343 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1344
1345 // get coeff*(2^M[extra_digits])/10^extra_digits
1346 __mul_64x128_full (QH, Q_low, C128.w[0],
1347 reciprocals10_128[extra_digits]);
1348
1349 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1350 amount = recip_scale[extra_digits];
1351
1352 C64 = QH >> amount;
1353
1354 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1355 #ifndef IEEE_ROUND_NEAREST
1356 if (rmode == 0) //ROUNDING_TO_NEAREST
1357 #endif
1358 if (C64 & 1) {
1359 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1360
1361 // get remainder
1362 amount2 = 64 - amount;
1363 remainder_h = 0;
1364 remainder_h--;
1365 remainder_h >>= amount2;
1366 remainder_h = remainder_h & QH;
1367
1368 if (!remainder_h
1369 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1370 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1371 && Q_low.w[0] <
1372 reciprocals10_128[extra_digits].w[0]))) {
1373 C64--;
1374 }
1375 }
1376 #endif
1377
1378 #ifdef SET_STATUS_FLAGS
1379
1380 if (is_inexact (fpsc))
1381 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1382 else {
1383 status = INEXACT_EXCEPTION;
1384 // get remainder
1385 remainder_h = QH << (64 - amount);
1386
1387 switch (rmode) {
1388 case ROUNDING_TO_NEAREST:
1389 case ROUNDING_TIES_AWAY:
1390 // test whether fractional part is 0
1391 if (remainder_h == 0x8000000000000000ull
1392 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1393 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1394 && Q_low.w[0] <
1395 reciprocals10_128[extra_digits].w[0])))
1396 status = EXACT_STATUS;
1397 break;
1398 case ROUNDING_DOWN:
1399 case ROUNDING_TO_ZERO:
1400 if (!remainder_h
1401 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1402 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1403 && Q_low.w[0] <
1404 reciprocals10_128[extra_digits].w[0])))
1405 status = EXACT_STATUS;
1406 break;
1407 default:
1408 // round up
1409 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1410 reciprocals10_128[extra_digits].w[0]);
1411 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1412 reciprocals10_128[extra_digits].w[1], CY);
1413 if ((remainder_h >> (64 - amount)) + carry >=
1414 (((UINT64) 1) << amount))
1415 status = EXACT_STATUS;
1416 }
1417
1418 if (status != EXACT_STATUS)
1419 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1420 }
1421
1422 #endif
1423
1424 return sgn | C64;
1425 }
1426
1427 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1428 expon--;
1429 coeff = (coeff << 3) + (coeff << 1);
1430 }
1431 if (expon > DECIMAL_MAX_EXPON_64) {
1432 #ifdef SET_STATUS_FLAGS
1433 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1434 #endif
1435 // overflow
1436 r = sgn | INFINITY_MASK64;
1437 switch (rmode) {
1438 case ROUNDING_DOWN:
1439 if (!sgn)
1440 r = LARGEST_BID64;
1441 break;
1442 case ROUNDING_TO_ZERO:
1443 r = sgn | LARGEST_BID64;
1444 break;
1445 case ROUNDING_UP:
1446 // round up
1447 if (sgn)
1448 r = SMALLEST_BID64;
1449 }
1450 return r;
1451 } else {
1452 mask = 1;
1453 mask <<= EXPONENT_SHIFT_SMALL64;
1454 if (coeff >= mask) {
1455 r = expon;
1456 r <<= EXPONENT_SHIFT_LARGE64;
1457 r |= (sgn | SPECIAL_ENCODING_MASK64);
1458 // add coeff, without leading bits
1459 mask = (mask >> 2) - 1;
1460 coeff &= mask;
1461 r |= coeff;
1462 return r;
1463 }
1464 }
1465 }
1466
1467 r = expon;
1468 r <<= EXPONENT_SHIFT_SMALL64;
1469 r |= (coeff | sgn);
1470
1471 return r;
1472 }
1473
1474
1475 /*****************************************************************************
1476 *
1477 * BID128 pack/unpack macros
1478 *
1479 *****************************************************************************/
1480
1481 //
1482 // Macro for handling BID128 underflow
1483 // sticky bit given as additional argument
1484 //
1485 __BID_INLINE__ UINT128 *
1486 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1487 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1488 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1489 UINT64 carry, CY;
1490 int ed2, amount;
1491 unsigned rmode, status;
1492
1493 // UF occurs
1494 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1495 #ifdef SET_STATUS_FLAGS
1496 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1497 #endif
1498 pres->w[1] = sgn;
1499 pres->w[0] = 0;
1500 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1501 #ifndef IEEE_ROUND_NEAREST
1502 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1503 || (!sgn && *prounding_mode == ROUNDING_UP))
1504 pres->w[0] = 1ull;
1505 #endif
1506 #endif
1507 return pres;
1508 }
1509 // CQ *= 10
1510 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1511 CQ2.w[0] = CQ.w[0] << 1;
1512 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1513 CQ8.w[0] = CQ.w[0] << 3;
1514 __add_128_128 (CQ, CQ2, CQ8);
1515
1516 // add remainder
1517 if (R)
1518 CQ.w[0] |= 1;
1519
1520 ed2 = 1 - expon;
1521 // add rounding constant to CQ
1522 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1523 #ifndef IEEE_ROUND_NEAREST
1524 rmode = *prounding_mode;
1525 if (sgn && (unsigned) (rmode - 1) < 2)
1526 rmode = 3 - rmode;
1527 #else
1528 rmode = 0;
1529 #endif
1530 #else
1531 rmode = 0;
1532 #endif
1533 T128 = round_const_table_128[rmode][ed2];
1534 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1535 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1536
1537 TP128 = reciprocals10_128[ed2];
1538 __mul_128x128_full (Qh, Ql, CQ, TP128);
1539 amount = recip_scale[ed2];
1540
1541 if (amount >= 64) {
1542 CQ.w[0] = Qh.w[1] >> (amount - 64);
1543 CQ.w[1] = 0;
1544 } else {
1545 __shr_128 (CQ, Qh, amount);
1546 }
1547
1548 expon = 0;
1549
1550 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1551 #ifndef IEEE_ROUND_NEAREST
1552 if (!(*prounding_mode))
1553 #endif
1554 if (CQ.w[0] & 1) {
1555 // check whether fractional part of initial_P/10^ed1 is exactly .5
1556
1557 // get remainder
1558 __shl_128_long (Qh1, Qh, (128 - amount));
1559
1560 if (!Qh1.w[1] && !Qh1.w[0]
1561 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1562 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1563 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1564 CQ.w[0]--;
1565 }
1566 }
1567 #endif
1568
1569 #ifdef SET_STATUS_FLAGS
1570
1571 if (is_inexact (fpsc))
1572 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1573 else {
1574 status = INEXACT_EXCEPTION;
1575 // get remainder
1576 __shl_128_long (Qh1, Qh, (128 - amount));
1577
1578 switch (rmode) {
1579 case ROUNDING_TO_NEAREST:
1580 case ROUNDING_TIES_AWAY:
1581 // test whether fractional part is 0
1582 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1583 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1584 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1585 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1586 status = EXACT_STATUS;
1587 break;
1588 case ROUNDING_DOWN:
1589 case ROUNDING_TO_ZERO:
1590 if ((!Qh1.w[1]) && (!Qh1.w[0])
1591 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1592 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1593 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1594 status = EXACT_STATUS;
1595 break;
1596 default:
1597 // round up
1598 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1599 reciprocals10_128[ed2].w[0]);
1600 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1601 reciprocals10_128[ed2].w[1], CY);
1602 __shr_128_long (Qh, Qh1, (128 - amount));
1603 Tmp.w[0] = 1;
1604 Tmp.w[1] = 0;
1605 __shl_128_long (Tmp1, Tmp, amount);
1606 Qh.w[0] += carry;
1607 if (Qh.w[0] < carry)
1608 Qh.w[1]++;
1609 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1610 status = EXACT_STATUS;
1611 }
1612
1613 if (status != EXACT_STATUS)
1614 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1615 }
1616
1617 #endif
1618
1619 pres->w[1] = sgn | CQ.w[1];
1620 pres->w[0] = CQ.w[0];
1621
1622 return pres;
1623
1624 }
1625
1626
1627 //
1628 // Macro for handling BID128 underflow
1629 //
1630 __BID_INLINE__ UINT128 *
1631 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1632 unsigned *prounding_mode, unsigned *fpsc) {
1633 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1634 UINT64 carry, CY;
1635 int ed2, amount;
1636 unsigned rmode, status;
1637
1638 // UF occurs
1639 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1640 #ifdef SET_STATUS_FLAGS
1641 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1642 #endif
1643 pres->w[1] = sgn;
1644 pres->w[0] = 0;
1645 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1646 #ifndef IEEE_ROUND_NEAREST
1647 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1648 || (!sgn && *prounding_mode == ROUNDING_UP))
1649 pres->w[0] = 1ull;
1650 #endif
1651 #endif
1652 return pres;
1653 }
1654
1655 ed2 = 0 - expon;
1656 // add rounding constant to CQ
1657 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1658 #ifndef IEEE_ROUND_NEAREST
1659 rmode = *prounding_mode;
1660 if (sgn && (unsigned) (rmode - 1) < 2)
1661 rmode = 3 - rmode;
1662 #else
1663 rmode = 0;
1664 #endif
1665 #else
1666 rmode = 0;
1667 #endif
1668
1669 T128 = round_const_table_128[rmode][ed2];
1670 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1671 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1672
1673 TP128 = reciprocals10_128[ed2];
1674 __mul_128x128_full (Qh, Ql, CQ, TP128);
1675 amount = recip_scale[ed2];
1676
1677 if (amount >= 64) {
1678 CQ.w[0] = Qh.w[1] >> (amount - 64);
1679 CQ.w[1] = 0;
1680 } else {
1681 __shr_128 (CQ, Qh, amount);
1682 }
1683
1684 expon = 0;
1685
1686 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1687 #ifndef IEEE_ROUND_NEAREST
1688 if (!(*prounding_mode))
1689 #endif
1690 if (CQ.w[0] & 1) {
1691 // check whether fractional part of initial_P/10^ed1 is exactly .5
1692
1693 // get remainder
1694 __shl_128_long (Qh1, Qh, (128 - amount));
1695
1696 if (!Qh1.w[1] && !Qh1.w[0]
1697 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1698 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1699 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1700 CQ.w[0]--;
1701 }
1702 }
1703 #endif
1704
1705 #ifdef SET_STATUS_FLAGS
1706
1707 if (is_inexact (fpsc))
1708 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1709 else {
1710 status = INEXACT_EXCEPTION;
1711 // get remainder
1712 __shl_128_long (Qh1, Qh, (128 - amount));
1713
1714 switch (rmode) {
1715 case ROUNDING_TO_NEAREST:
1716 case ROUNDING_TIES_AWAY:
1717 // test whether fractional part is 0
1718 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1719 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1720 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1721 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1722 status = EXACT_STATUS;
1723 break;
1724 case ROUNDING_DOWN:
1725 case ROUNDING_TO_ZERO:
1726 if ((!Qh1.w[1]) && (!Qh1.w[0])
1727 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1728 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1729 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1730 status = EXACT_STATUS;
1731 break;
1732 default:
1733 // round up
1734 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1735 reciprocals10_128[ed2].w[0]);
1736 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1737 reciprocals10_128[ed2].w[1], CY);
1738 __shr_128_long (Qh, Qh1, (128 - amount));
1739 Tmp.w[0] = 1;
1740 Tmp.w[1] = 0;
1741 __shl_128_long (Tmp1, Tmp, amount);
1742 Qh.w[0] += carry;
1743 if (Qh.w[0] < carry)
1744 Qh.w[1]++;
1745 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1746 status = EXACT_STATUS;
1747 }
1748
1749 if (status != EXACT_STATUS)
1750 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1751 }
1752
1753 #endif
1754
1755 pres->w[1] = sgn | CQ.w[1];
1756 pres->w[0] = CQ.w[0];
1757
1758 return pres;
1759
1760 }
1761
1762
1763
1764 //
1765 // BID128 unpack, input passed by value
1766 //
1767 __BID_INLINE__ UINT64
1768 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1769 UINT128 * pcoefficient_x, UINT128 x) {
1770 UINT128 coeff, T33, T34;
1771 UINT64 ex;
1772
1773 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1774
1775 // special encodings
1776 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1777 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1778 // non-canonical input
1779 pcoefficient_x->w[0] = 0;
1780 pcoefficient_x->w[1] = 0;
1781 ex = (x.w[1]) >> 47;
1782 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1783 return 0;
1784 }
1785 // 10^33
1786 T33 = power10_table_128[33];
1787 /*coeff.w[0] = x.w[0];
1788 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1789 pcoefficient_x->w[0] = x.w[0];
1790 pcoefficient_x->w[1] = x.w[1];
1791 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1792 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1793
1794 pcoefficient_x->w[0] = x.w[0];
1795 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1796 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1797 {
1798 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1799 pcoefficient_x->w[0] = 0;
1800 } else
1801 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1802 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1803 pcoefficient_x->w[0] = 0;
1804 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1805 }
1806 *pexponent_x = 0;
1807 return 0; // NaN or Infinity
1808 }
1809
1810 coeff.w[0] = x.w[0];
1811 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1812
1813 // 10^34
1814 T34 = power10_table_128[34];
1815 // check for non-canonical values
1816 if (__unsigned_compare_ge_128 (coeff, T34))
1817 coeff.w[0] = coeff.w[1] = 0;
1818
1819 pcoefficient_x->w[0] = coeff.w[0];
1820 pcoefficient_x->w[1] = coeff.w[1];
1821
1822 ex = (x.w[1]) >> 49;
1823 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1824
1825 return coeff.w[0] | coeff.w[1];
1826 }
1827
1828
1829 //
1830 // BID128 unpack, input pased by reference
1831 //
1832 __BID_INLINE__ UINT64
1833 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1834 UINT128 * pcoefficient_x, UINT128 * px) {
1835 UINT128 coeff, T33, T34;
1836 UINT64 ex;
1837
1838 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1839
1840 // special encodings
1841 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1842 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1843 // non-canonical input
1844 pcoefficient_x->w[0] = 0;
1845 pcoefficient_x->w[1] = 0;
1846 ex = (px->w[1]) >> 47;
1847 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1848 return 0;
1849 }
1850 // 10^33
1851 T33 = power10_table_128[33];
1852 coeff.w[0] = px->w[0];
1853 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1854 pcoefficient_x->w[0] = px->w[0];
1855 pcoefficient_x->w[1] = px->w[1];
1856 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
1857 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1858 pcoefficient_x->w[0] = 0;
1859 }
1860 *pexponent_x = 0;
1861 return 0; // NaN or Infinity
1862 }
1863
1864 coeff.w[0] = px->w[0];
1865 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1866
1867 // 10^34
1868 T34 = power10_table_128[34];
1869 // check for non-canonical values
1870 if (__unsigned_compare_ge_128 (coeff, T34))
1871 coeff.w[0] = coeff.w[1] = 0;
1872
1873 pcoefficient_x->w[0] = coeff.w[0];
1874 pcoefficient_x->w[1] = coeff.w[1];
1875
1876 ex = (px->w[1]) >> 49;
1877 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1878
1879 return coeff.w[0] | coeff.w[1];
1880 }
1881
1882 //
1883 // Pack macro checks for overflow, but not underflow
1884 //
1885 __BID_INLINE__ UINT128 *
1886 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1887 UINT128 coeff, unsigned *prounding_mode,
1888 unsigned *fpsc) {
1889 UINT128 T;
1890 UINT64 tmp, tmp2;
1891
1892 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1893
1894 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1895 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1896 while (__unsigned_compare_gt_128 (T, coeff)
1897 && expon > DECIMAL_MAX_EXPON_128) {
1898 coeff.w[1] =
1899 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1900 (coeff.w[0] >> 63);
1901 tmp2 = coeff.w[0] << 3;
1902 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1903 if (coeff.w[0] < tmp2)
1904 coeff.w[1]++;
1905
1906 expon--;
1907 }
1908 }
1909 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1910 // OF
1911 #ifdef SET_STATUS_FLAGS
1912 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1913 #endif
1914 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1915 #ifndef IEEE_ROUND_NEAREST
1916 if (*prounding_mode == ROUNDING_TO_ZERO
1917 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1918 &&
1919 *prounding_mode
1920 ==
1921 ROUNDING_DOWN))
1922 {
1923 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1924 pres->w[0] = LARGEST_BID128_LOW;
1925 } else
1926 #endif
1927 #endif
1928 {
1929 pres->w[1] = sgn | INFINITY_MASK64;
1930 pres->w[0] = 0;
1931 }
1932 return pres;
1933 }
1934 }
1935
1936 pres->w[0] = coeff.w[0];
1937 tmp = expon;
1938 tmp <<= 49;
1939 pres->w[1] = sgn | tmp | coeff.w[1];
1940
1941 return pres;
1942 }
1943
1944
1945 //
1946 // No overflow/underflow checks
1947 // No checking for coefficient == 10^34 (rounding artifact)
1948 //
1949 __BID_INLINE__ UINT128 *
1950 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1951 UINT128 coeff) {
1952 UINT64 tmp;
1953
1954 pres->w[0] = coeff.w[0];
1955 tmp = expon;
1956 tmp <<= 49;
1957 pres->w[1] = sgn | tmp | coeff.w[1];
1958
1959 return pres;
1960 }
1961
1962 //
1963 // No overflow/underflow checks
1964 //
1965 __BID_INLINE__ UINT128 *
1966 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1967 UINT64 tmp;
1968
1969 // coeff==10^34?
1970 if (coeff.w[1] == 0x0001ed09bead87c0ull
1971 && coeff.w[0] == 0x378d8e6400000000ull) {
1972 expon++;
1973 // set coefficient to 10^33
1974 coeff.w[1] = 0x0000314dc6448d93ull;
1975 coeff.w[0] = 0x38c15b0a00000000ull;
1976 }
1977
1978 pres->w[0] = coeff.w[0];
1979 tmp = expon;
1980 tmp <<= 49;
1981 pres->w[1] = sgn | tmp | coeff.w[1];
1982
1983 return pres;
1984 }
1985
1986 //
1987 // General BID128 pack macro
1988 //
1989 __BID_INLINE__ UINT128 *
1990 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1991 unsigned *prounding_mode, unsigned *fpsc) {
1992 UINT128 T;
1993 UINT64 tmp, tmp2;
1994
1995 // coeff==10^34?
1996 if (coeff.w[1] == 0x0001ed09bead87c0ull
1997 && coeff.w[0] == 0x378d8e6400000000ull) {
1998 expon++;
1999 // set coefficient to 10^33
2000 coeff.w[1] = 0x0000314dc6448d93ull;
2001 coeff.w[0] = 0x38c15b0a00000000ull;
2002 }
2003 // check OF, UF
2004 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2005 // check UF
2006 if (expon < 0) {
2007 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2008 fpsc);
2009 }
2010
2011 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2012 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2013 while (__unsigned_compare_gt_128 (T, coeff)
2014 && expon > DECIMAL_MAX_EXPON_128) {
2015 coeff.w[1] =
2016 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2017 (coeff.w[0] >> 63);
2018 tmp2 = coeff.w[0] << 3;
2019 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2020 if (coeff.w[0] < tmp2)
2021 coeff.w[1]++;
2022
2023 expon--;
2024 }
2025 }
2026 if (expon > DECIMAL_MAX_EXPON_128) {
2027 if (!(coeff.w[1] | coeff.w[0])) {
2028 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2029 pres->w[0] = 0;
2030 return pres;
2031 }
2032 // OF
2033 #ifdef SET_STATUS_FLAGS
2034 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2035 #endif
2036 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2037 #ifndef IEEE_ROUND_NEAREST
2038 if (*prounding_mode == ROUNDING_TO_ZERO
2039 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2040 &&
2041 *prounding_mode
2042 ==
2043 ROUNDING_DOWN))
2044 {
2045 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2046 pres->w[0] = LARGEST_BID128_LOW;
2047 } else
2048 #endif
2049 #endif
2050 {
2051 pres->w[1] = sgn | INFINITY_MASK64;
2052 pres->w[0] = 0;
2053 }
2054 return pres;
2055 }
2056 }
2057
2058 pres->w[0] = coeff.w[0];
2059 tmp = expon;
2060 tmp <<= 49;
2061 pres->w[1] = sgn | tmp | coeff.w[1];
2062
2063 return pres;
2064 }
2065
2066
2067 //
2068 // Macro used for conversions from string
2069 // (no additional arguments given for rounding mode, status flags)
2070 //
2071 __BID_INLINE__ UINT128 *
2072 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2073 UINT128 D2, D8;
2074 UINT64 tmp;
2075 unsigned rmode = 0, status;
2076
2077 // coeff==10^34?
2078 if (coeff.w[1] == 0x0001ed09bead87c0ull
2079 && coeff.w[0] == 0x378d8e6400000000ull) {
2080 expon++;
2081 // set coefficient to 10^33
2082 coeff.w[1] = 0x0000314dc6448d93ull;
2083 coeff.w[0] = 0x38c15b0a00000000ull;
2084 }
2085 // check OF, UF
2086 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2087 // check UF
2088 if (expon < 0)
2089 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2090
2091 // OF
2092
2093 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2094 while (expon > DECIMAL_MAX_EXPON_128 &&
2095 (coeff.w[1] < power10_table_128[33].w[1] ||
2096 (coeff.w[1] == power10_table_128[33].w[1]
2097 && coeff.w[0] < power10_table_128[33].w[0]))) {
2098 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2099 D2.w[0] = coeff.w[0] << 1;
2100 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2101 D8.w[0] = coeff.w[0] << 3;
2102
2103 __add_128_128 (coeff, D2, D8);
2104 expon--;
2105 }
2106 } else if (!(coeff.w[0] | coeff.w[1]))
2107 expon = DECIMAL_MAX_EXPON_128;
2108
2109 if (expon > DECIMAL_MAX_EXPON_128) {
2110 pres->w[1] = sgn | INFINITY_MASK64;
2111 pres->w[0] = 0;
2112 switch (rmode) {
2113 case ROUNDING_DOWN:
2114 if (!sgn) {
2115 pres->w[1] = LARGEST_BID128_HIGH;
2116 pres->w[0] = LARGEST_BID128_LOW;
2117 }
2118 break;
2119 case ROUNDING_TO_ZERO:
2120 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2121 pres->w[0] = LARGEST_BID128_LOW;
2122 break;
2123 case ROUNDING_UP:
2124 // round up
2125 if (sgn) {
2126 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2127 pres->w[0] = LARGEST_BID128_LOW;
2128 }
2129 break;
2130 }
2131
2132 return pres;
2133 }
2134 }
2135
2136 pres->w[0] = coeff.w[0];
2137 tmp = expon;
2138 tmp <<= 49;
2139 pres->w[1] = sgn | tmp | coeff.w[1];
2140
2141 return pres;
2142 }
2143
2144
2145
2146 /*****************************************************************************
2147 *
2148 * BID32 pack/unpack macros
2149 *
2150 *****************************************************************************/
2151
2152
2153 __BID_INLINE__ UINT32
2154 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2155 UINT32 * pcoefficient_x, UINT32 x) {
2156 UINT32 tmp;
2157
2158 *psign_x = x & 0x80000000;
2159
2160 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2161 // special encodings
2162 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2163 *pcoefficient_x = x & 0xfe0fffff;
2164 if ((x & 0x000fffff) >= 1000000)
2165 *pcoefficient_x = x & 0xfe000000;
2166 if ((x & NAN_MASK32) == INFINITY_MASK32)
2167 *pcoefficient_x = x & 0xf8000000;
2168 *pexponent_x = 0;
2169 return 0; // NaN or Infinity
2170 }
2171 // coefficient
2172 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2173 // check for non-canonical value
2174 if (*pcoefficient_x >= 10000000)
2175 *pcoefficient_x = 0;
2176 // get exponent
2177 tmp = x >> 21;
2178 *pexponent_x = tmp & EXPONENT_MASK32;
2179 return 1;
2180 }
2181 // exponent
2182 tmp = x >> 23;
2183 *pexponent_x = tmp & EXPONENT_MASK32;
2184 // coefficient
2185 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2186
2187 return *pcoefficient_x;
2188 }
2189
2190 //
2191 // General pack macro for BID32
2192 //
2193 __BID_INLINE__ UINT32
2194 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2195 unsigned *fpsc) {
2196 UINT128 Q;
2197 UINT64 C64, remainder_h, carry, Stemp;
2198 UINT32 r, mask;
2199 int extra_digits, amount, amount2;
2200 unsigned status;
2201
2202 if (coeff > 9999999ull) {
2203 expon++;
2204 coeff = 1000000ull;
2205 }
2206 // check for possible underflow/overflow
2207 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2208 if (expon < 0) {
2209 // underflow
2210 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2211 #ifdef SET_STATUS_FLAGS
2212 __set_status_flags (fpsc,
2213 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2214 #endif
2215 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2216 #ifndef IEEE_ROUND_NEAREST
2217 if (rmode == ROUNDING_DOWN && sgn)
2218 return 0x80000001;
2219 if (rmode == ROUNDING_UP && !sgn)
2220 return 1;
2221 #endif
2222 #endif
2223 // result is 0
2224 return sgn;
2225 }
2226 // get digits to be shifted out
2227 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2228 rmode = 0;
2229 #endif
2230 #ifdef IEEE_ROUND_NEAREST
2231 rmode = 0;
2232 #endif
2233 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2234 #ifndef IEEE_ROUND_NEAREST
2235 if (sgn && (unsigned) (rmode - 1) < 2)
2236 rmode = 3 - rmode;
2237 #endif
2238 #endif
2239
2240 extra_digits = -expon;
2241 coeff += round_const_table[rmode][extra_digits];
2242
2243 // get coeff*(2^M[extra_digits])/10^extra_digits
2244 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2245
2246 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2247 amount = short_recip_scale[extra_digits];
2248
2249 C64 = Q.w[1] >> amount;
2250
2251 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2252 #ifndef IEEE_ROUND_NEAREST
2253 if (rmode == 0) //ROUNDING_TO_NEAREST
2254 #endif
2255 if (C64 & 1) {
2256 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2257
2258 // get remainder
2259 amount2 = 64 - amount;
2260 remainder_h = 0;
2261 remainder_h--;
2262 remainder_h >>= amount2;
2263 remainder_h = remainder_h & Q.w[1];
2264
2265 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2266 C64--;
2267 }
2268 }
2269 #endif
2270
2271 #ifdef SET_STATUS_FLAGS
2272
2273 if (is_inexact (fpsc))
2274 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2275 else {
2276 status = INEXACT_EXCEPTION;
2277 // get remainder
2278 remainder_h = Q.w[1] << (64 - amount);
2279
2280 switch (rmode) {
2281 case ROUNDING_TO_NEAREST:
2282 case ROUNDING_TIES_AWAY:
2283 // test whether fractional part is 0
2284 if (remainder_h == 0x8000000000000000ull
2285 && (Q.w[0] < reciprocals10_64[extra_digits]))
2286 status = EXACT_STATUS;
2287 break;
2288 case ROUNDING_DOWN:
2289 case ROUNDING_TO_ZERO:
2290 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2291 status = EXACT_STATUS;
2292 break;
2293 default:
2294 // round up
2295 __add_carry_out (Stemp, carry, Q.w[0],
2296 reciprocals10_64[extra_digits]);
2297 if ((remainder_h >> (64 - amount)) + carry >=
2298 (((UINT64) 1) << amount))
2299 status = EXACT_STATUS;
2300 }
2301
2302 if (status != EXACT_STATUS)
2303 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2304 }
2305
2306 #endif
2307
2308 return sgn | (UINT32) C64;
2309 }
2310
2311 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2312 coeff = (coeff << 3) + (coeff << 1);
2313 expon--;
2314 }
2315 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2316 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2317 // overflow
2318 r = sgn | INFINITY_MASK32;
2319 switch (rmode) {
2320 case ROUNDING_DOWN:
2321 if (!sgn)
2322 r = LARGEST_BID32;
2323 break;
2324 case ROUNDING_TO_ZERO:
2325 r = sgn | LARGEST_BID32;
2326 break;
2327 case ROUNDING_UP:
2328 // round up
2329 if (sgn)
2330 r = sgn | LARGEST_BID32;
2331 }
2332 return r;
2333 }
2334 }
2335
2336 mask = 1 << 23;
2337
2338 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2339 if (coeff < mask) {
2340 r = expon;
2341 r <<= 23;
2342 r |= ((UINT32) coeff | sgn);
2343 return r;
2344 }
2345 // special format
2346
2347 r = expon;
2348 r <<= 21;
2349 r |= (sgn | SPECIAL_ENCODING_MASK32);
2350 // add coeff, without leading bits
2351 mask = (1 << 21) - 1;
2352 r |= (((UINT32) coeff) & mask);
2353
2354 return r;
2355 }
2356
2357
2358
2359 //
2360 // no overflow/underflow checks
2361 //
2362 __BID_INLINE__ UINT32
2363 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2364 UINT32 r, mask;
2365
2366 mask = 1 << 23;
2367
2368 // check whether coefficient fits in 10*2+3 bits
2369 if (coeff < mask) {
2370 r = expon;
2371 r <<= 23;
2372 r |= (coeff | sgn);
2373 return r;
2374 }
2375 // special format
2376 r = expon;
2377 r <<= 21;
2378 r |= (sgn | SPECIAL_ENCODING_MASK32);
2379 // add coeff, without leading bits
2380 mask = (1 << 21) - 1;
2381 coeff &= mask;
2382 r |= coeff;
2383
2384 return r;
2385 }
2386
2387
2388
2389 /*************************************************************
2390 *
2391 *************************************************************/
2392 typedef
2393 ALIGN (16)
2394 struct {
2395 UINT64 w[6];
2396 } UINT384;
2397 typedef ALIGN (16)
2398 struct {
2399 UINT64 w[8];
2400 } UINT512;
2401
2402 // #define P 34
2403 #define MASK_STEERING_BITS 0x6000000000000000ull
2404 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2405 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2406 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2407 //used to take G[2:w+3] (sec 3.3)
2408 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2409 //used to mask out G4:T0 (sec 3.3)
2410 #define MASK_BINARY_OR2 0x0020000000000000ull
2411 //used to prefix 8+G4 to T (sec 3.3)
2412 #define UPPER_EXPON_LIMIT 51
2413 #define MASK_EXP 0x7ffe000000000000ull
2414 #define MASK_SPECIAL 0x7800000000000000ull
2415 #define MASK_NAN 0x7c00000000000000ull
2416 #define MASK_SNAN 0x7e00000000000000ull
2417 #define MASK_ANY_INF 0x7c00000000000000ull
2418 #define MASK_INF 0x7800000000000000ull
2419 #define MASK_SIGN 0x8000000000000000ull
2420 #define MASK_COEFF 0x0001ffffffffffffull
2421 #define BIN_EXP_BIAS (0x1820ull << 49)
2422
2423 #define EXP_MIN 0x0000000000000000ull
2424 // EXP_MIN = (-6176 + 6176) << 49
2425 #define EXP_MAX 0x5ffe000000000000ull
2426 // EXP_MAX = (6111 + 6176) << 49
2427 #define EXP_MAX_P1 0x6000000000000000ull
2428 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2429 #define EXP_P1 0x0002000000000000ull
2430 // EXP_ P1= 1 << 49
2431 #define expmin -6176
2432 // min unbiased exponent
2433 #define expmax 6111
2434 // max unbiased exponent
2435 #define expmin16 -398
2436 // min unbiased exponent
2437 #define expmax16 369
2438 // max unbiased exponent
2439
2440 #define SIGNMASK32 0x80000000
2441 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2442 #define SIGNMASK64 0x8000000000000000ull
2443
2444 // typedef unsigned int FPSC; // floating-point status and control
2445 // bit31:
2446 // bit30:
2447 // bit29:
2448 // bit28:
2449 // bit27:
2450 // bit26:
2451 // bit25:
2452 // bit24:
2453 // bit23:
2454 // bit22:
2455 // bit21:
2456 // bit20:
2457 // bit19:
2458 // bit18:
2459 // bit17:
2460 // bit16:
2461 // bit15:
2462 // bit14: RC:2
2463 // bit13: RC:1
2464 // bit12: RC:0
2465 // bit11: PM
2466 // bit10: UM
2467 // bit9: OM
2468 // bit8: ZM
2469 // bit7: DM
2470 // bit6: IM
2471 // bit5: PE
2472 // bit4: UE
2473 // bit3: OE
2474 // bit2: ZE
2475 // bit1: DE
2476 // bit0: IE
2477
2478 #define ROUNDING_MODE_MASK 0x00007000
2479
2480 typedef struct _DEC_DIGITS {
2481 unsigned int digits;
2482 UINT64 threshold_hi;
2483 UINT64 threshold_lo;
2484 unsigned int digits1;
2485 } DEC_DIGITS;
2486
2487 extern DEC_DIGITS nr_digits[];
2488 extern UINT64 midpoint64[];
2489 extern UINT128 midpoint128[];
2490 extern UINT192 midpoint192[];
2491 extern UINT256 midpoint256[];
2492 extern UINT64 ten2k64[];
2493 extern UINT128 ten2k128[];
2494 extern UINT256 ten2k256[];
2495 extern UINT128 ten2mk128[];
2496 extern UINT64 ten2mk64[];
2497 extern UINT128 ten2mk128trunc[];
2498 extern int shiftright128[];
2499 extern UINT64 maskhigh128[];
2500 extern UINT64 maskhigh128M[];
2501 extern UINT64 maskhigh192M[];
2502 extern UINT64 maskhigh256M[];
2503 extern UINT64 onehalf128[];
2504 extern UINT64 onehalf128M[];
2505 extern UINT64 onehalf192M[];
2506 extern UINT64 onehalf256M[];
2507 extern UINT128 ten2mk128M[];
2508 extern UINT128 ten2mk128truncM[];
2509 extern UINT192 ten2mk192truncM[];
2510 extern UINT256 ten2mk256truncM[];
2511 extern int shiftright128M[];
2512 extern int shiftright192M[];
2513 extern int shiftright256M[];
2514 extern UINT192 ten2mk192M[];
2515 extern UINT256 ten2mk256M[];
2516 extern unsigned char char_table2[];
2517 extern unsigned char char_table3[];
2518
2519 extern UINT64 ten2m3k64[];
2520 extern unsigned int shift_ten2m3k64[];
2521 extern UINT128 ten2m3k128[];
2522 extern unsigned int shift_ten2m3k128[];
2523
2524
2525
2526 /***************************************************************************
2527 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2528 ***************************************************************************/
2529
2530 extern UINT64 Kx64[];
2531 extern unsigned int Ex64m64[];
2532 extern UINT64 half64[];
2533 extern UINT64 mask64[];
2534 extern UINT64 ten2mxtrunc64[];
2535
2536 extern UINT128 Kx128[];
2537 extern unsigned int Ex128m128[];
2538 extern UINT64 half128[];
2539 extern UINT64 mask128[];
2540 extern UINT128 ten2mxtrunc128[];
2541
2542 extern UINT192 Kx192[];
2543 extern unsigned int Ex192m192[];
2544 extern UINT64 half192[];
2545 extern UINT64 mask192[];
2546 extern UINT192 ten2mxtrunc192[];
2547
2548 extern UINT256 Kx256[];
2549 extern unsigned int Ex256m256[];
2550 extern UINT64 half256[];
2551 extern UINT64 mask256[];
2552 extern UINT256 ten2mxtrunc256[];
2553
2554 typedef union __bid64_128 {
2555 UINT64 b64;
2556 UINT128 b128;
2557 } BID64_128;
2558
2559 BID64_128 bid_fma (unsigned int P0,
2560 BID64_128 x1, unsigned int P1,
2561 BID64_128 y1, unsigned int P2,
2562 BID64_128 z1, unsigned int P3,
2563 unsigned int rnd_mode, FPSC * fpsc);
2564
2565 #define P16 16
2566 #define P34 34
2567
2568 union __int_double {
2569 UINT64 i;
2570 double d;
2571 };
2572 typedef union __int_double int_double;
2573
2574
2575 union __int_float {
2576 UINT32 i;
2577 float d;
2578 };
2579 typedef union __int_float int_float;
2580
2581 #define SWAP(A,B,T) {\
2582 T = A; \
2583 A = B; \
2584 B = T; \
2585 }
2586
2587 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2588 // ie it knows that it is A bits long
2589 #define NUMBITS(A, coefficient_x, tempx){\
2590 temp_x.d=(float)coefficient_x;\
2591 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2592 }
2593
2594 enum class_types {
2595 signalingNaN,
2596 quietNaN,
2597 negativeInfinity,
2598 negativeNormal,
2599 negativeSubnormal,
2600 negativeZero,
2601 positiveZero,
2602 positiveSubnormal,
2603 positiveNormal,
2604 positiveInfinity
2605 };
2606
2607 typedef union {
2608 UINT64 ui64;
2609 double d;
2610 } BID_UI64DOUBLE;
2611
2612 #endif