]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid_internal.h
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / libbid / bid_internal.h
CommitLineData
f1717362 1/* Copyright (C) 2007-2016 Free Software Foundation, Inc.
9b6b0236 2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
6bc9506f 7Software Foundation; either version 3, or (at your option) any later
9b6b0236 8version.
9
9b6b0236 10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
6bc9506f 15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
9b6b0236 23
24#ifndef __BIDECIMAL_H
25#define __BIDECIMAL_H
26
27#include "bid_conf.h"
28#include "bid_functions.h"
29
9b6b0236 30#define __BID_INLINE__ static __inline
31
32/*********************************************************************
33 *
34 * Logical Shift Macros
35 *
36 *********************************************************************/
37
38#define __shr_128(Q, A, k) \
39{ \
40 (Q).w[0] = (A).w[0] >> k; \
41 (Q).w[0] |= (A).w[1] << (64-k); \
42 (Q).w[1] = (A).w[1] >> k; \
43}
44
45#define __shr_128_long(Q, A, k) \
46{ \
47 if((k)<64) { \
48 (Q).w[0] = (A).w[0] >> k; \
49 (Q).w[0] |= (A).w[1] << (64-k); \
50 (Q).w[1] = (A).w[1] >> k; \
51 } \
52 else { \
53 (Q).w[0] = (A).w[1]>>((k)-64); \
54 (Q).w[1] = 0; \
55 } \
56}
57
58#define __shl_128_long(Q, A, k) \
59{ \
60 if((k)<64) { \
61 (Q).w[1] = (A).w[1] << k; \
62 (Q).w[1] |= (A).w[0] >> (64-k); \
63 (Q).w[0] = (A).w[0] << k; \
64 } \
65 else { \
66 (Q).w[1] = (A).w[0]<<((k)-64); \
67 (Q).w[0] = 0; \
68 } \
69}
70
71#define __low_64(Q) (Q).w[0]
72/*********************************************************************
73 *
74 * String Macros
75 *
76 *********************************************************************/
77#define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78/*********************************************************************
79 *
80 * Compare Macros
81 *
82 *********************************************************************/
83// greater than
84// return 0 if A<=B
85// non-zero if A>B
86#define __unsigned_compare_gt_128(A, B) \
87 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88// greater-or-equal
89#define __unsigned_compare_ge_128(A, B) \
90 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91#define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92// tighten exponent range
93#define __tight_bin_range_128(bp, P, bin_expon) \
94{ \
95UINT64 M; \
96 M = 1; \
97 (bp) = (bin_expon); \
98 if((bp)<63) { \
99 M <<= ((bp)+1); \
100 if((P).w[0] >= M) (bp)++; } \
101 else if((bp)>64) { \
102 M <<= ((bp)+1-64); \
103 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 (bp)++; } \
105 else if((P).w[1]) (bp)++; \
106}
107/*********************************************************************
108 *
109 * Add/Subtract Macros
110 *
111 *********************************************************************/
112// add 64-bit value to 128-bit
113#define __add_128_64(R128, A128, B64) \
114{ \
115UINT64 R64H; \
116 R64H = (A128).w[1]; \
117 (R128).w[0] = (B64) + (A128).w[0]; \
118 if((R128).w[0] < (B64)) \
119 R64H ++; \
120 (R128).w[1] = R64H; \
121}
122// subtract 64-bit value from 128-bit
123#define __sub_128_64(R128, A128, B64) \
124{ \
125UINT64 R64H; \
126 R64H = (A128).w[1]; \
127 if((A128).w[0] < (B64)) \
128 R64H --; \
129 (R128).w[1] = R64H; \
130 (R128).w[0] = (A128).w[0] - (B64); \
131}
132// add 128-bit value to 128-bit
133// assume no carry-out
134#define __add_128_128(R128, A128, B128) \
135{ \
136UINT128 Q128; \
137 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
139 if(Q128.w[0] < (B128).w[0]) \
140 Q128.w[1] ++; \
141 (R128).w[1] = Q128.w[1]; \
142 (R128).w[0] = Q128.w[0]; \
143}
144#define __sub_128_128(R128, A128, B128) \
145{ \
146UINT128 Q128; \
147 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
149 if((A128).w[0] < (B128).w[0]) \
150 Q128.w[1] --; \
151 (R128).w[1] = Q128.w[1]; \
152 (R128).w[0] = Q128.w[0]; \
153}
154#define __add_carry_out(S, CY, X, Y) \
155{ \
156UINT64 X1=X; \
157 S = X + Y; \
158 CY = (S<X1) ? 1 : 0; \
159}
160#define __add_carry_in_out(S, CY, X, Y, CI) \
161{ \
162UINT64 X1; \
163 X1 = X + CI; \
164 S = X1 + Y; \
165 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
166}
167#define __sub_borrow_out(S, CY, X, Y) \
168{ \
169UINT64 X1=X; \
170 S = X - Y; \
171 CY = (S>X1) ? 1 : 0; \
172}
173#define __sub_borrow_in_out(S, CY, X, Y, CI) \
174{ \
175UINT64 X1, X0=X; \
176 X1 = X - CI; \
177 S = X1 - Y; \
178 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
179}
180// increment C128 and check for rounding overflow:
181// if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182#define INCREMENT(C_128, exp) \
183{ \
184 C_128.w[0]++; \
185 if (C_128.w[0] == 0) C_128.w[1]++; \
186 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
187 C_128.w[0] == 0x378d8e6400000000ull) { \
188 exp++; \
189 C_128.w[1] = 0x0000314dc6448d93ull; \
190 C_128.w[0] = 0x38c15b0a00000000ull; \
191 } \
192}
193// decrement C128 and check for rounding underflow, but only at the
194// boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195// and decrement the exponent
196#define DECREMENT(C_128, exp) \
197{ \
198 C_128.w[0]--; \
199 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
200 if (C_128.w[1] == 0x0000314dc6448d93ull && \
201 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
202 exp--; \
203 C_128.w[1] = 0x0001ed09bead87c0ull; \
204 C_128.w[0] = 0x378d8e63ffffffffull; \
205 } \
206}
84d1fc49 207
9b6b0236 208 /*********************************************************************
209 *
210 * Multiply Macros
211 *
212 *********************************************************************/
213#define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
214/***************************************
215 * Signed, Full 64x64-bit Multiply
216 ***************************************/
217#define __imul_64x64_to_128(P, CX, CY) \
218{ \
219UINT64 SX, SY; \
220 __mul_64x64_to_128(P, CX, CY); \
221 \
222 SX = ((SINT64)(CX))>>63; \
223 SY = ((SINT64)(CY))>>63; \
224 SX &= CY; SY &= CX; \
225 \
226 (P).w[1] = (P).w[1] - SX - SY; \
227}
228/***************************************
229 * Signed, Full 64x128-bit Multiply
230 ***************************************/
231#define __imul_64x128_full(Ph, Ql, A, B) \
232{ \
233UINT128 ALBL, ALBH, QM2, QM; \
234 \
235 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
236 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
237 \
238 (Ql).w[0] = ALBL.w[0]; \
239 QM.w[0] = ALBL.w[1]; \
240 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
241 __add_128_128(QM2, ALBH, QM); \
242 (Ql).w[1] = QM2.w[0]; \
243 Ph = QM2.w[1]; \
244}
245/*****************************************************
246 * Unsigned Multiply Macros
247 *****************************************************/
248// get full 64x64bit product
249//
250#define __mul_64x64_to_128(P, CX, CY) \
251{ \
252UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 CXH = (CX) >> 32; \
254 CXL = (UINT32)(CX); \
255 CYH = (CY) >> 32; \
256 CYL = (UINT32)(CY); \
257 \
258 PM = CXH*CYL; \
259 PH = CXH*CYH; \
260 PL = CXL*CYL; \
261 PM2 = CXL*CYH; \
262 PH += (PM>>32); \
263 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264 \
265 (P).w[1] = PH + (PM>>32); \
266 (P).w[0] = (PM<<32)+(UINT32)PL; \
267}
268// get full 64x64bit product
269// Note:
270// This macro is used for CX < 2^61, CY < 2^61
271//
272#define __mul_64x64_to_128_fast(P, CX, CY) \
273{ \
274UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
275 CXH = (CX) >> 32; \
276 CXL = (UINT32)(CX); \
277 CYH = (CY) >> 32; \
278 CYL = (UINT32)(CY); \
279 \
280 PM = CXH*CYL; \
281 PL = CXL*CYL; \
282 PH = CXH*CYH; \
283 PM += CXL*CYH; \
284 PM += (PL>>32); \
285 \
286 (P).w[1] = PH + (PM>>32); \
287 (P).w[0] = (PM<<32)+(UINT32)PL; \
288}
289// used for CX< 2^60
290#define __sqr64_fast(P, CX) \
291{ \
292UINT64 CXH, CXL, PL, PH, PM; \
293 CXH = (CX) >> 32; \
294 CXL = (UINT32)(CX); \
295 \
296 PM = CXH*CXL; \
297 PL = CXL*CXL; \
298 PH = CXH*CXH; \
299 PM += PM; \
300 PM += (PL>>32); \
301 \
302 (P).w[1] = PH + (PM>>32); \
303 (P).w[0] = (PM<<32)+(UINT32)PL; \
304}
305// get full 64x64bit product
306// Note:
307// This implementation is used for CX < 2^61, CY < 2^61
308//
309#define __mul_64x64_to_64_high_fast(P, CX, CY) \
310{ \
311UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
312 CXH = (CX) >> 32; \
313 CXL = (UINT32)(CX); \
314 CYH = (CY) >> 32; \
315 CYL = (UINT32)(CY); \
316 \
317 PM = CXH*CYL; \
318 PL = CXL*CYL; \
319 PH = CXH*CYH; \
320 PM += CXL*CYH; \
321 PM += (PL>>32); \
322 \
323 (P) = PH + (PM>>32); \
324}
325// get full 64x64bit product
326//
327#define __mul_64x64_to_128_full(P, CX, CY) \
328{ \
329UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 CXH = (CX) >> 32; \
331 CXL = (UINT32)(CX); \
332 CYH = (CY) >> 32; \
333 CYL = (UINT32)(CY); \
334 \
335 PM = CXH*CYL; \
336 PH = CXH*CYH; \
337 PL = CXL*CYL; \
338 PM2 = CXL*CYH; \
339 PH += (PM>>32); \
340 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341 \
342 (P).w[1] = PH + (PM>>32); \
343 (P).w[0] = (PM<<32)+(UINT32)PL; \
344}
345#define __mul_128x128_high(Q, A, B) \
346{ \
347UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
348 \
349 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
350 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
351 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
352 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
353 \
354 __add_128_128(QM, ALBH, AHBL); \
355 __add_128_64(QM2, QM, ALBL.w[1]); \
356 __add_128_64((Q), AHBH, QM2.w[1]); \
357}
358#define __mul_128x128_full(Qh, Ql, A, B) \
359{ \
360UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
361 \
362 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
363 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
364 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
365 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
366 \
367 __add_128_128(QM, ALBH, AHBL); \
368 (Ql).w[0] = ALBL.w[0]; \
369 __add_128_64(QM2, QM, ALBL.w[1]); \
370 __add_128_64((Qh), AHBH, QM2.w[1]); \
371 (Ql).w[1] = QM2.w[0]; \
372}
373#define __mul_128x128_low(Ql, A, B) \
374{ \
375UINT128 ALBL; \
376UINT64 QM64; \
377 \
378 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
379 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
380 \
381 (Ql).w[0] = ALBL.w[0]; \
382 (Ql).w[1] = QM64 + ALBL.w[1]; \
383}
384#define __mul_64x128_low(Ql, A, B) \
385{ \
386 UINT128 ALBL, ALBH, QM2; \
387 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
388 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
389 (Ql).w[0] = ALBL.w[0]; \
390 __add_128_64(QM2, ALBH, ALBL.w[1]); \
391 (Ql).w[1] = QM2.w[0]; \
392}
393#define __mul_64x128_full(Ph, Ql, A, B) \
394{ \
395UINT128 ALBL, ALBH, QM2; \
396 \
397 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
398 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
399 \
400 (Ql).w[0] = ALBL.w[0]; \
401 __add_128_64(QM2, ALBH, ALBL.w[1]); \
402 (Ql).w[1] = QM2.w[0]; \
403 Ph = QM2.w[1]; \
404}
405#define __mul_64x128_to_192(Q, A, B) \
406{ \
407UINT128 ALBL, ALBH, QM2; \
408 \
409 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
410 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
411 \
412 (Q).w[0] = ALBL.w[0]; \
413 __add_128_64(QM2, ALBH, ALBL.w[1]); \
414 (Q).w[1] = QM2.w[0]; \
415 (Q).w[2] = QM2.w[1]; \
416}
417#define __mul_64x128_to192(Q, A, B) \
418{ \
419UINT128 ALBL, ALBH, QM2; \
420 \
421 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
422 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
423 \
424 (Q).w[0] = ALBL.w[0]; \
425 __add_128_64(QM2, ALBH, ALBL.w[1]); \
426 (Q).w[1] = QM2.w[0]; \
427 (Q).w[2] = QM2.w[1]; \
428}
429#define __mul_128x128_to_256(P256, A, B) \
430{ \
431UINT128 Qll, Qlh; \
432UINT64 Phl, Phh, CY1, CY2; \
433 \
434 __mul_64x128_full(Phl, Qll, A.w[0], B); \
435 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
436 (P256).w[0] = Qll.w[0]; \
437 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
438 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
439 (P256).w[3] = Phh + CY2; \
440}
441//
442// For better performance, will check A.w[1] against 0,
443// but not B.w[1]
444// Use this macro accordingly
445#define __mul_128x128_to_256_check_A(P256, A, B) \
446{ \
447UINT128 Qll, Qlh; \
448UINT64 Phl, Phh, CY1, CY2; \
449 \
450 __mul_64x128_full(Phl, Qll, A.w[0], B); \
451 (P256).w[0] = Qll.w[0]; \
452 if(A.w[1]) { \
453 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
456 (P256).w[3] = Phh + CY2; } \
457 else { \
458 (P256).w[1] = Qll.w[1]; \
459 (P256).w[2] = Phl; \
460 (P256).w[3] = 0; } \
461}
462#define __mul_64x192_to_256(lP, lA, lB) \
463{ \
464UINT128 lP0,lP1,lP2; \
465UINT64 lC; \
466 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
467 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
468 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
469 (lP).w[0] = lP0.w[0]; \
470 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
471 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 (lP).w[3] = lP2.w[1] + lC; \
473}
474#define __mul_64x256_to_320(P, A, B) \
475{ \
476UINT128 lP0,lP1,lP2,lP3; \
477UINT64 lC; \
478 __mul_64x64_to_128(lP0, A, (B).w[0]); \
479 __mul_64x64_to_128(lP1, A, (B).w[1]); \
480 __mul_64x64_to_128(lP2, A, (B).w[2]); \
481 __mul_64x64_to_128(lP3, A, (B).w[3]); \
482 (P).w[0] = lP0.w[0]; \
483 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
484 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 (P).w[4] = lP3.w[1] + lC; \
487}
488#define __mul_192x192_to_384(P, A, B) \
489{ \
490UINT256 P0,P1,P2; \
491UINT64 CY; \
492 __mul_64x192_to_256(P0, (A).w[0], B); \
493 __mul_64x192_to_256(P1, (A).w[1], B); \
494 __mul_64x192_to_256(P2, (A).w[2], B); \
495 (P).w[0] = P0.w[0]; \
496 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
497 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 (P).w[4] = P1.w[3] + CY; \
500 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
501 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
502 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
503 (P).w[5] = P2.w[3] + CY; \
504}
505#define __mul_64x320_to_384(P, A, B) \
506{ \
507UINT128 lP0,lP1,lP2,lP3,lP4; \
508UINT64 lC; \
509 __mul_64x64_to_128(lP0, A, (B).w[0]); \
510 __mul_64x64_to_128(lP1, A, (B).w[1]); \
511 __mul_64x64_to_128(lP2, A, (B).w[2]); \
512 __mul_64x64_to_128(lP3, A, (B).w[3]); \
513 __mul_64x64_to_128(lP4, A, (B).w[4]); \
514 (P).w[0] = lP0.w[0]; \
515 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
516 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 (P).w[5] = lP4.w[1] + lC; \
520}
521// A*A
522// Full 128x128-bit product
523#define __sqr128_to_256(P256, A) \
524{ \
525UINT128 Qll, Qlh, Qhh; \
526UINT64 TMP_C1, TMP_C2; \
527 \
528 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
529 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
530 Qhh.w[1] += (Qlh.w[1]>>63); \
531 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
532 Qlh.w[0] += Qlh.w[0]; \
533 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
534 \
535 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
536 (P256).w[0] = Qll.w[0]; \
537 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
538 (P256).w[3] = Qhh.w[1]+TMP_C2; \
539}
540#define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
541{ \
542UINT128 Qll, Qlh; \
543UINT64 Phl, Phh, C1, C2; \
544 \
545 __mul_64x128_full(Phl, Qll, A.w[0], B); \
546 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
547 (PQl).w[0] = Qll.w[0]; \
548 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
549 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
550 (PQh).w[1] = Phh + C2; \
551}
552#define __mul_256x256_to_512(P, A, B) \
553{ \
554UINT512 P0,P1,P2,P3; \
555UINT64 CY; \
556 __mul_64x256_to_320(P0, (A).w[0], B); \
557 __mul_64x256_to_320(P1, (A).w[1], B); \
558 __mul_64x256_to_320(P2, (A).w[2], B); \
559 __mul_64x256_to_320(P3, (A).w[3], B); \
560 (P).w[0] = P0.w[0]; \
561 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
562 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 (P).w[5] = P1.w[4] + CY; \
566 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
567 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
568 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
569 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
570 (P).w[6] = P2.w[4] + CY; \
571 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
572 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
573 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
574 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
575 (P).w[7] = P3.w[4] + CY; \
576}
577#define __mul_192x256_to_448(P, A, B) \
578{ \
579UINT512 P0,P1,P2; \
580UINT64 CY; \
581 __mul_64x256_to_320(P0, (A).w[0], B); \
582 __mul_64x256_to_320(P1, (A).w[1], B); \
583 __mul_64x256_to_320(P2, (A).w[2], B); \
584 (P).w[0] = P0.w[0]; \
585 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
586 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 (P).w[5] = P1.w[4] + CY; \
590 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
591 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
592 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
593 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
594 (P).w[6] = P2.w[4] + CY; \
595}
596#define __mul_320x320_to_640(P, A, B) \
597{ \
598UINT512 P0,P1,P2,P3; \
599UINT64 CY; \
600 __mul_256x256_to_512((P), (A), B); \
601 __mul_64x256_to_320(P1, (A).w[4], B); \
602 __mul_64x256_to_320(P2, (B).w[4], A); \
603 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
604 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
605 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 P3.w[1] += CY; \
610 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
611 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 (P).w[9] = P3.w[1] + CY; \
616}
617#define __mul_384x384_to_768(P, A, B) \
618{ \
619UINT512 P0,P1,P2,P3; \
620UINT64 CY; \
621 __mul_320x320_to_640((P), (A), B); \
622 __mul_64x320_to_384(P1, (A).w[5], B); \
623 __mul_64x320_to_384(P2, (B).w[5], A); \
624 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
625 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
626 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 P3.w[1] += CY; \
632 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
633 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 (P).w[11] = P3.w[1] + CY; \
639}
640#define __mul_64x128_short(Ql, A, B) \
641{ \
642UINT64 ALBH_L; \
643 \
644 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
645 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
646 \
647 (Ql).w[1] += ALBH_L; \
648}
649#define __scale128_10(D,_TMP) \
650{ \
651UINT128 _TMP2,_TMP8; \
652 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
653 _TMP2.w[0] = _TMP.w[0]<<1; \
654 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
655 _TMP8.w[0] = _TMP.w[0]<<3; \
656 __add_128_128(D, _TMP2, _TMP8); \
657}
658// 64x64-bit product
659#define __mul_64x64_to_128MACH(P128, CX64, CY64) \
660{ \
661 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
662 CXH = (CX64) >> 32; \
663 CXL = (UINT32)(CX64); \
664 CYH = (CY64) >> 32; \
665 CYL = (UINT32)(CY64); \
666 PM = CXH*CYL; \
667 PH = CXH*CYH; \
668 PL = CXL*CYL; \
669 PM2 = CXL*CYH; \
670 PH += (PM>>32); \
671 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
672 (P128).w[1] = PH + (PM>>32); \
673 (P128).w[0] = (PM<<32)+(UINT32)PL; \
674}
675// 64x64-bit product
676#define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
677{ \
678 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
679 CXH = (CX64) >> 32; \
680 CXL = (UINT32)(CX64); \
681 CYH = (CY64) >> 32; \
682 CYL = (UINT32)(CY64); \
683 PM = CXH*CYL; \
684 PH = CXH*CYH; \
685 PL = CXL*CYL; \
686 PM2 = CXL*CYH; \
687 PH += (PM>>32); \
688 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
689 P64 = PH + (PM>>32); \
690}
691#define __mul_128x64_to_128(Q128, A64, B128) \
692{ \
693 UINT64 ALBH_L; \
694 ALBH_L = (A64) * (B128).w[1]; \
695 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
696 (Q128).w[1] += ALBH_L; \
697}
698// might simplify by calculating just QM2.w[0]
699#define __mul_64x128_to_128(Ql, A, B) \
700{ \
701 UINT128 ALBL, ALBH, QM2; \
702 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
703 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
704 (Ql).w[0] = ALBL.w[0]; \
705 __add_128_64(QM2, ALBH, ALBL.w[1]); \
706 (Ql).w[1] = QM2.w[0]; \
707}
708/*********************************************************************
709 *
710 * BID Pack/Unpack Macros
711 *
712 *********************************************************************/
713/////////////////////////////////////////
714// BID64 definitions
715////////////////////////////////////////
716#define DECIMAL_MAX_EXPON_64 767
717#define DECIMAL_EXPONENT_BIAS 398
718#define MAX_FORMAT_DIGITS 16
719/////////////////////////////////////////
720// BID128 definitions
721////////////////////////////////////////
722#define DECIMAL_MAX_EXPON_128 12287
723#define DECIMAL_EXPONENT_BIAS_128 6176
724#define MAX_FORMAT_DIGITS_128 34
725/////////////////////////////////////////
726// BID32 definitions
727////////////////////////////////////////
728#define DECIMAL_MAX_EXPON_32 191
729#define DECIMAL_EXPONENT_BIAS_32 101
730#define MAX_FORMAT_DIGITS_32 7
731////////////////////////////////////////
732// Constant Definitions
733///////////////////////////////////////
734#define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735#define INFINITY_MASK64 0x7800000000000000ull
84d1fc49 736#define SINFINITY_MASK64 0xf800000000000000ull
737#define SSNAN_MASK64 0xfc00000000000000ull
9b6b0236 738#define NAN_MASK64 0x7c00000000000000ull
739#define SNAN_MASK64 0x7e00000000000000ull
740#define QUIET_MASK64 0xfdffffffffffffffull
741#define LARGE_COEFF_MASK64 0x0007ffffffffffffull
742#define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
743#define SMALL_COEFF_MASK64 0x001fffffffffffffull
744#define EXPONENT_MASK64 0x3ff
745#define EXPONENT_SHIFT_LARGE64 51
746#define EXPONENT_SHIFT_SMALL64 53
747#define LARGEST_BID64 0x77fb86f26fc0ffffull
748#define SMALLEST_BID64 0xf7fb86f26fc0ffffull
749#define SMALL_COEFF_MASK128 0x0001ffffffffffffull
750#define LARGE_COEFF_MASK128 0x00007fffffffffffull
751#define EXPONENT_MASK128 0x3fff
752#define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
753#define LARGEST_BID128_LOW 0x378d8e63ffffffffull
754#define SPECIAL_ENCODING_MASK32 0x60000000ul
755#define INFINITY_MASK32 0x78000000ul
756#define LARGE_COEFF_MASK32 0x007ffffful
757#define LARGE_COEFF_HIGH_BIT32 0x00800000ul
758#define SMALL_COEFF_MASK32 0x001ffffful
759#define EXPONENT_MASK32 0xff
760#define LARGEST_BID32 0x77f8967f
84d1fc49 761#define NAN_MASK32 0x7c000000
762#define SNAN_MASK32 0x7e000000
9b6b0236 763#define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
764#define BINARY_EXPONENT_BIAS 0x3ff
765#define UPPER_EXPON_LIMIT 51
766// data needed for BID pack/unpack macros
84d1fc49 767extern UINT64 round_const_table[][19];
768extern UINT128 reciprocals10_128[];
769extern int recip_scale[];
770extern UINT128 power10_table_128[];
771extern int estimate_decimal_digits[];
772extern int estimate_bin_expon[];
773extern UINT64 power10_index_binexp[];
774extern int short_recip_scale[];
775extern UINT64 reciprocals10_64[];
776extern UINT128 power10_index_binexp_128[];
777extern UINT128 round_const_table_128[][36];
9b6b0236 778
779
780//////////////////////////////////////////////
781// Status Flag Handling
782/////////////////////////////////////////////
783#define __set_status_flags(fpsc, status) *(fpsc) |= status
784#define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
785
786__BID_INLINE__ UINT64
787unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 UINT64 * pcoefficient_x, UINT64 x) {
789 UINT64 tmp, coeff;
790
791 *psign_x = x & 0x8000000000000000ull;
792
793 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794 // special encodings
795 // coefficient
796 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797
798 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
84d1fc49 799 *pexponent_x = 0;
800 *pcoefficient_x = x & 0xfe03ffffffffffffull;
801 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 *pcoefficient_x = x & 0xfe00000000000000ull;
803 if ((x & NAN_MASK64) == INFINITY_MASK64)
804 *pcoefficient_x = x & SINFINITY_MASK64;
9b6b0236 805 return 0; // NaN or Infinity
806 }
807 // check for non-canonical values
808 if (coeff >= 10000000000000000ull)
809 coeff = 0;
810 *pcoefficient_x = coeff;
811 // get exponent
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
84d1fc49 814 return coeff;
9b6b0236 815 }
816 // exponent
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 // coefficient
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
822 return *pcoefficient_x;
823}
824
825//
826// BID64 pack macro (general form)
827//
828__BID_INLINE__ UINT64
829get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 unsigned *fpsc) {
831 UINT128 Stemp, Q_low;
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833 int extra_digits, amount, amount2;
834 unsigned status;
835
836 if (coeff > 9999999999999999ull) {
837 expon++;
838 coeff = 1000000000000000ull;
839 }
840 // check for possible underflow/overflow
841 if (((unsigned) expon) >= 3 * 256) {
842 if (expon < 0) {
843 // underflow
844 if (expon + MAX_FORMAT_DIGITS < 0) {
845#ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc,
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848#endif
849#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850#ifndef IEEE_ROUND_NEAREST
851 if (rmode == ROUNDING_DOWN && sgn)
852 return 0x8000000000000001ull;
853 if (rmode == ROUNDING_UP && !sgn)
854 return 1ull;
855#endif
856#endif
857 // result is 0
858 return sgn;
859 }
860#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861#ifndef IEEE_ROUND_NEAREST
862 if (sgn && (unsigned) (rmode - 1) < 2)
863 rmode = 3 - rmode;
864#endif
865#endif
866 // get digits to be shifted out
867 extra_digits = -expon;
84d1fc49 868 coeff += round_const_table[rmode][extra_digits];
9b6b0236 869
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH, Q_low, coeff,
84d1fc49 872 reciprocals10_128[extra_digits]);
9b6b0236 873
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
84d1fc49 875 amount = recip_scale[extra_digits];
9b6b0236 876
877 C64 = QH >> amount;
878
879#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880#ifndef IEEE_ROUND_NEAREST
84d1fc49 881 if (rmode == 0) //ROUNDING_TO_NEAREST
9b6b0236 882#endif
883 if (C64 & 1) {
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
886 // get remainder
887 amount2 = 64 - amount;
888 remainder_h = 0;
889 remainder_h--;
890 remainder_h >>= amount2;
891 remainder_h = remainder_h & QH;
892
893 if (!remainder_h
84d1fc49 894 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 896 && Q_low.w[0] <
84d1fc49 897 reciprocals10_128[extra_digits].w[0]))) {
9b6b0236 898 C64--;
899 }
900 }
901#endif
902
903#ifdef SET_STATUS_FLAGS
904
905 if (is_inexact (fpsc))
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907 else {
908 status = INEXACT_EXCEPTION;
909 // get remainder
910 remainder_h = QH << (64 - amount);
911
912 switch (rmode) {
913 case ROUNDING_TO_NEAREST:
914 case ROUNDING_TIES_AWAY:
915 // test whether fractional part is 0
916 if (remainder_h == 0x8000000000000000ull
84d1fc49 917 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 919 && Q_low.w[0] <
84d1fc49 920 reciprocals10_128[extra_digits].w[0])))
9b6b0236 921 status = EXACT_STATUS;
922 break;
923 case ROUNDING_DOWN:
924 case ROUNDING_TO_ZERO:
925 if (!remainder_h
84d1fc49 926 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 928 && Q_low.w[0] <
84d1fc49 929 reciprocals10_128[extra_digits].w[0])))
9b6b0236 930 status = EXACT_STATUS;
931 break;
932 default:
933 // round up
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
84d1fc49 935 reciprocals10_128[extra_digits].w[0]);
9b6b0236 936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
84d1fc49 937 reciprocals10_128[extra_digits].w[1], CY);
9b6b0236 938 if ((remainder_h >> (64 - amount)) + carry >=
939 (((UINT64) 1) << amount))
940 status = EXACT_STATUS;
941 }
942
943 if (status != EXACT_STATUS)
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945 }
946
947#endif
948
949 return sgn | C64;
950 }
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952 expon--;
953 coeff = (coeff << 3) + (coeff << 1);
954 }
955 if (expon > DECIMAL_MAX_EXPON_64) {
956#ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958#endif
959 // overflow
960 r = sgn | INFINITY_MASK64;
961 switch (rmode) {
962 case ROUNDING_DOWN:
963 if (!sgn)
964 r = LARGEST_BID64;
965 break;
966 case ROUNDING_TO_ZERO:
967 r = sgn | LARGEST_BID64;
968 break;
969 case ROUNDING_UP:
970 // round up
971 if (sgn)
972 r = SMALLEST_BID64;
973 }
974 return r;
975 }
976 }
977
978 mask = 1;
979 mask <<= EXPONENT_SHIFT_SMALL64;
980
981 // check whether coefficient fits in 10*5+3 bits
982 if (coeff < mask) {
983 r = expon;
984 r <<= EXPONENT_SHIFT_SMALL64;
985 r |= (coeff | sgn);
986 return r;
987 }
988 // special format
989
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff == 10000000000000000ull) {
992 r = expon + 1;
993 r <<= EXPONENT_SHIFT_SMALL64;
994 r |= (1000000000000000ull | sgn);
995 return r;
996 }
997
998 r = expon;
999 r <<= EXPONENT_SHIFT_LARGE64;
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
1001 // add coeff, without leading bits
1002 mask = (mask >> 2) - 1;
1003 coeff &= mask;
1004 r |= coeff;
1005
1006 return r;
1007}
1008
1009
1010
1011
1012//
1013// No overflow/underflow checking
1014//
1015__BID_INLINE__ UINT64
1016fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017 UINT64 r, mask;
1018
1019 mask = 1;
1020 mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022 // check whether coefficient fits in 10*5+3 bits
1023 if (coeff < mask) {
1024 r = expon;
1025 r <<= EXPONENT_SHIFT_SMALL64;
1026 r |= (coeff | sgn);
1027 return r;
1028 }
1029 // special format
1030
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff == 10000000000000000ull) {
1033 r = expon + 1;
1034 r <<= EXPONENT_SHIFT_SMALL64;
1035 r |= (1000000000000000ull | sgn);
1036 return r;
1037 }
1038
1039 r = expon;
1040 r <<= EXPONENT_SHIFT_LARGE64;
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
1042 // add coeff, without leading bits
1043 mask = (mask >> 2) - 1;
1044 coeff &= mask;
1045 r |= coeff;
1046
1047 return r;
1048}
1049
1050
1051//
1052// no underflow checking
1053//
1054__BID_INLINE__ UINT64
1055fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 unsigned *fpsc) {
1057 UINT64 r, mask;
1058
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061 expon = 3 * 256;
1062 coeff = 1000000000000000ull;
1063 }
1064
1065 if (((unsigned) expon) >= 3 * 256) {
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 expon--;
1068 coeff = (coeff << 3) + (coeff << 1);
1069 }
1070 if (expon > DECIMAL_MAX_EXPON_64) {
1071#ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc,
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074#endif
1075 // overflow
1076 r = sgn | INFINITY_MASK64;
1077 switch (rmode) {
1078 case ROUNDING_DOWN:
1079 if (!sgn)
1080 r = LARGEST_BID64;
1081 break;
1082 case ROUNDING_TO_ZERO:
1083 r = sgn | LARGEST_BID64;
1084 break;
1085 case ROUNDING_UP:
1086 // round up
1087 if (sgn)
1088 r = SMALLEST_BID64;
1089 }
1090 return r;
1091 }
1092 }
1093 }
1094
1095 mask = 1;
1096 mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098 // check whether coefficient fits in 10*5+3 bits
1099 if (coeff < mask) {
1100 r = expon;
1101 r <<= EXPONENT_SHIFT_SMALL64;
1102 r |= (coeff | sgn);
1103 return r;
1104 }
1105 // special format
1106
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff == 10000000000000000ull) {
1109 r = expon + 1;
1110 r <<= EXPONENT_SHIFT_SMALL64;
1111 r |= (1000000000000000ull | sgn);
1112 return r;
1113 }
1114
1115 r = expon;
1116 r <<= EXPONENT_SHIFT_LARGE64;
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
1118 // add coeff, without leading bits
1119 mask = (mask >> 2) - 1;
1120 coeff &= mask;
1121 r |= coeff;
1122
1123 return r;
1124}
1125
1126
1127//
1128// No overflow/underflow checking
1129// or checking for coefficients equal to 10^16 (after rounding)
1130//
1131__BID_INLINE__ UINT64
1132very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133 UINT64 r, mask;
1134
1135 mask = 1;
1136 mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138 // check whether coefficient fits in 10*5+3 bits
1139 if (coeff < mask) {
1140 r = expon;
1141 r <<= EXPONENT_SHIFT_SMALL64;
1142 r |= (coeff | sgn);
1143 return r;
1144 }
1145 // special format
1146 r = expon;
1147 r <<= EXPONENT_SHIFT_LARGE64;
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
1149 // add coeff, without leading bits
1150 mask = (mask >> 2) - 1;
1151 coeff &= mask;
1152 r |= coeff;
1153
1154 return r;
1155}
1156
1157//
1158// No overflow/underflow checking or checking for coefficients above 2^53
1159//
1160__BID_INLINE__ UINT64
1161very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162 // no UF/OF
1163 UINT64 r;
1164
1165 r = expon;
1166 r <<= EXPONENT_SHIFT_SMALL64;
1167 r |= (coeff | sgn);
1168 return r;
1169}
1170
1171
1172//
1173// This pack macro is used when underflow is known to occur
1174//
1175__BID_INLINE__ UINT64
1176get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 unsigned *fpsc) {
1178 UINT128 C128, Q_low, Stemp;
1179 UINT64 C64, remainder_h, QH, carry, CY;
1180 int extra_digits, amount, amount2;
1181 unsigned status;
1182
1183 // underflow
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
1185#ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187#endif
1188#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189#ifndef IEEE_ROUND_NEAREST
1190 if (rmode == ROUNDING_DOWN && sgn)
1191 return 0x8000000000000001ull;
1192 if (rmode == ROUNDING_UP && !sgn)
1193 return 1ull;
1194#endif
1195#endif
1196 // result is 0
1197 return sgn;
1198 }
1199 // 10*coeff
1200 coeff = (coeff << 3) + (coeff << 1);
1201#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202#ifndef IEEE_ROUND_NEAREST
1203 if (sgn && (unsigned) (rmode - 1) < 2)
1204 rmode = 3 - rmode;
1205#endif
1206#endif
1207 if (R)
1208 coeff |= 1;
1209 // get digits to be shifted out
1210 extra_digits = 1 - expon;
84d1fc49 1211 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
9b6b0236 1212
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
84d1fc49 1215 reciprocals10_128[extra_digits]);
9b6b0236 1216
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
84d1fc49 1218 amount = recip_scale[extra_digits];
9b6b0236 1219
1220 C64 = QH >> amount;
1221 //__shr_128(C128, Q_high, amount);
1222
1223#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224#ifndef IEEE_ROUND_NEAREST
84d1fc49 1225 if (rmode == 0) //ROUNDING_TO_NEAREST
9b6b0236 1226#endif
1227 if (C64 & 1) {
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230 // get remainder
1231 amount2 = 64 - amount;
1232 remainder_h = 0;
1233 remainder_h--;
1234 remainder_h >>= amount2;
1235 remainder_h = remainder_h & QH;
1236
1237 if (!remainder_h
84d1fc49 1238 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1240 && Q_low.w[0] <
84d1fc49 1241 reciprocals10_128[extra_digits].w[0]))) {
9b6b0236 1242 C64--;
1243 }
1244 }
1245#endif
1246
1247#ifdef SET_STATUS_FLAGS
1248
1249 if (is_inexact (fpsc))
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251 else {
1252 status = INEXACT_EXCEPTION;
1253 // get remainder
1254 remainder_h = QH << (64 - amount);
1255
1256 switch (rmode) {
1257 case ROUNDING_TO_NEAREST:
1258 case ROUNDING_TIES_AWAY:
1259 // test whether fractional part is 0
1260 if (remainder_h == 0x8000000000000000ull
84d1fc49 1261 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1263 && Q_low.w[0] <
84d1fc49 1264 reciprocals10_128[extra_digits].w[0])))
9b6b0236 1265 status = EXACT_STATUS;
1266 break;
1267 case ROUNDING_DOWN:
1268 case ROUNDING_TO_ZERO:
1269 if (!remainder_h
84d1fc49 1270 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1272 && Q_low.w[0] <
84d1fc49 1273 reciprocals10_128[extra_digits].w[0])))
9b6b0236 1274 status = EXACT_STATUS;
1275 break;
1276 default:
1277 // round up
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
84d1fc49 1279 reciprocals10_128[extra_digits].w[0]);
9b6b0236 1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
84d1fc49 1281 reciprocals10_128[extra_digits].w[1], CY);
9b6b0236 1282 if ((remainder_h >> (64 - amount)) + carry >=
1283 (((UINT64) 1) << amount))
1284 status = EXACT_STATUS;
1285 }
1286
1287 if (status != EXACT_STATUS)
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289 }
1290
1291#endif
1292
1293 return sgn | C64;
1294
1295}
1296
1297
1298
1299//
1300// This pack macro doesnot check for coefficients above 2^53
1301//
1302__BID_INLINE__ UINT64
1303get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 int rmode, unsigned *fpsc) {
1305 UINT128 C128, Q_low, Stemp;
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307 int extra_digits, amount, amount2;
1308 unsigned status;
1309
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon) >= 3 * 256) {
1312 if (expon < 0) {
1313 // underflow
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
1315#ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc,
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318#endif
1319#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320#ifndef IEEE_ROUND_NEAREST
1321 if (rmode == ROUNDING_DOWN && sgn)
1322 return 0x8000000000000001ull;
1323 if (rmode == ROUNDING_UP && !sgn)
1324 return 1ull;
1325#endif
1326#endif
1327 // result is 0
1328 return sgn;
1329 }
1330#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331#ifndef IEEE_ROUND_NEAREST
1332 if (sgn && (unsigned) (rmode - 1) < 2)
1333 rmode = 3 - rmode;
1334#endif
1335#endif
1336 // get digits to be shifted out
1337 extra_digits = -expon;
84d1fc49 1338 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
9b6b0236 1339
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
84d1fc49 1342 reciprocals10_128[extra_digits]);
9b6b0236 1343
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
84d1fc49 1345 amount = recip_scale[extra_digits];
9b6b0236 1346
1347 C64 = QH >> amount;
1348
1349#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350#ifndef IEEE_ROUND_NEAREST
84d1fc49 1351 if (rmode == 0) //ROUNDING_TO_NEAREST
9b6b0236 1352#endif
1353 if (C64 & 1) {
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356 // get remainder
1357 amount2 = 64 - amount;
1358 remainder_h = 0;
1359 remainder_h--;
1360 remainder_h >>= amount2;
1361 remainder_h = remainder_h & QH;
1362
1363 if (!remainder_h
84d1fc49 1364 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1366 && Q_low.w[0] <
84d1fc49 1367 reciprocals10_128[extra_digits].w[0]))) {
9b6b0236 1368 C64--;
1369 }
1370 }
1371#endif
1372
1373#ifdef SET_STATUS_FLAGS
1374
1375 if (is_inexact (fpsc))
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377 else {
1378 status = INEXACT_EXCEPTION;
1379 // get remainder
1380 remainder_h = QH << (64 - amount);
1381
1382 switch (rmode) {
1383 case ROUNDING_TO_NEAREST:
1384 case ROUNDING_TIES_AWAY:
1385 // test whether fractional part is 0
1386 if (remainder_h == 0x8000000000000000ull
84d1fc49 1387 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1389 && Q_low.w[0] <
84d1fc49 1390 reciprocals10_128[extra_digits].w[0])))
9b6b0236 1391 status = EXACT_STATUS;
1392 break;
1393 case ROUNDING_DOWN:
1394 case ROUNDING_TO_ZERO:
1395 if (!remainder_h
84d1fc49 1396 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
9b6b0236 1398 && Q_low.w[0] <
84d1fc49 1399 reciprocals10_128[extra_digits].w[0])))
9b6b0236 1400 status = EXACT_STATUS;
1401 break;
1402 default:
1403 // round up
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
84d1fc49 1405 reciprocals10_128[extra_digits].w[0]);
9b6b0236 1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
84d1fc49 1407 reciprocals10_128[extra_digits].w[1], CY);
9b6b0236 1408 if ((remainder_h >> (64 - amount)) + carry >=
1409 (((UINT64) 1) << amount))
1410 status = EXACT_STATUS;
1411 }
1412
1413 if (status != EXACT_STATUS)
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415 }
1416
1417#endif
1418
1419 return sgn | C64;
1420 }
1421
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423 expon--;
1424 coeff = (coeff << 3) + (coeff << 1);
1425 }
1426 if (expon > DECIMAL_MAX_EXPON_64) {
1427#ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429#endif
1430 // overflow
1431 r = sgn | INFINITY_MASK64;
1432 switch (rmode) {
1433 case ROUNDING_DOWN:
1434 if (!sgn)
1435 r = LARGEST_BID64;
1436 break;
1437 case ROUNDING_TO_ZERO:
1438 r = sgn | LARGEST_BID64;
1439 break;
1440 case ROUNDING_UP:
1441 // round up
1442 if (sgn)
1443 r = SMALLEST_BID64;
1444 }
1445 return r;
1446 } else {
1447 mask = 1;
1448 mask <<= EXPONENT_SHIFT_SMALL64;
1449 if (coeff >= mask) {
1450 r = expon;
1451 r <<= EXPONENT_SHIFT_LARGE64;
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 // add coeff, without leading bits
1454 mask = (mask >> 2) - 1;
1455 coeff &= mask;
1456 r |= coeff;
1457 return r;
1458 }
1459 }
1460 }
1461
1462 r = expon;
1463 r <<= EXPONENT_SHIFT_SMALL64;
1464 r |= (coeff | sgn);
1465
1466 return r;
1467}
1468
1469
1470/*****************************************************************************
1471*
1472* BID128 pack/unpack macros
1473*
1474*****************************************************************************/
1475
1476//
1477// Macro for handling BID128 underflow
1478// sticky bit given as additional argument
1479//
1480__BID_INLINE__ UINT128 *
1481handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484 UINT64 carry, CY;
1485 int ed2, amount;
1486 unsigned rmode, status;
1487
1488 // UF occurs
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490#ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492#endif
1493 pres->w[1] = sgn;
1494 pres->w[0] = 0;
1495#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496#ifndef IEEE_ROUND_NEAREST
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
1499 pres->w[0] = 1ull;
1500#endif
1501#endif
1502 return pres;
1503 }
1504 // CQ *= 10
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506 CQ2.w[0] = CQ.w[0] << 1;
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508 CQ8.w[0] = CQ.w[0] << 3;
1509 __add_128_128 (CQ, CQ2, CQ8);
1510
1511 // add remainder
1512 if (R)
1513 CQ.w[0] |= 1;
1514
1515 ed2 = 1 - expon;
1516 // add rounding constant to CQ
1517#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518#ifndef IEEE_ROUND_NEAREST
1519 rmode = *prounding_mode;
1520 if (sgn && (unsigned) (rmode - 1) < 2)
1521 rmode = 3 - rmode;
1522#else
1523 rmode = 0;
1524#endif
1525#else
1526 rmode = 0;
1527#endif
84d1fc49 1528 T128 = round_const_table_128[rmode][ed2];
9b6b0236 1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
84d1fc49 1532 TP128 = reciprocals10_128[ed2];
9b6b0236 1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
84d1fc49 1534 amount = recip_scale[ed2];
9b6b0236 1535
1536 if (amount >= 64) {
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
1538 CQ.w[1] = 0;
1539 } else {
1540 __shr_128 (CQ, Qh, amount);
1541 }
1542
1543 expon = 0;
1544
1545#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546#ifndef IEEE_ROUND_NEAREST
1547 if (!(*prounding_mode))
1548#endif
1549 if (CQ.w[0] & 1) {
1550 // check whether fractional part of initial_P/10^ed1 is exactly .5
1551
1552 // get remainder
1553 __shl_128_long (Qh1, Qh, (128 - amount));
1554
1555 if (!Qh1.w[1] && !Qh1.w[0]
84d1fc49 1556 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1557 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1558 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
9b6b0236 1559 CQ.w[0]--;
1560 }
1561 }
1562#endif
1563
1564#ifdef SET_STATUS_FLAGS
1565
1566 if (is_inexact (fpsc))
1567 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568 else {
1569 status = INEXACT_EXCEPTION;
1570 // get remainder
1571 __shl_128_long (Qh1, Qh, (128 - amount));
1572
1573 switch (rmode) {
1574 case ROUNDING_TO_NEAREST:
1575 case ROUNDING_TIES_AWAY:
1576 // test whether fractional part is 0
1577 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
84d1fc49 1578 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1579 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1580 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
9b6b0236 1581 status = EXACT_STATUS;
1582 break;
1583 case ROUNDING_DOWN:
1584 case ROUNDING_TO_ZERO:
1585 if ((!Qh1.w[1]) && (!Qh1.w[0])
84d1fc49 1586 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1587 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1588 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
9b6b0236 1589 status = EXACT_STATUS;
1590 break;
1591 default:
1592 // round up
1593 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
84d1fc49 1594 reciprocals10_128[ed2].w[0]);
9b6b0236 1595 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
84d1fc49 1596 reciprocals10_128[ed2].w[1], CY);
9b6b0236 1597 __shr_128_long (Qh, Qh1, (128 - amount));
1598 Tmp.w[0] = 1;
1599 Tmp.w[1] = 0;
1600 __shl_128_long (Tmp1, Tmp, amount);
1601 Qh.w[0] += carry;
1602 if (Qh.w[0] < carry)
1603 Qh.w[1]++;
1604 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605 status = EXACT_STATUS;
1606 }
1607
1608 if (status != EXACT_STATUS)
1609 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610 }
1611
1612#endif
1613
1614 pres->w[1] = sgn | CQ.w[1];
1615 pres->w[0] = CQ.w[0];
1616
1617 return pres;
1618
1619}
1620
1621
1622//
1623// Macro for handling BID128 underflow
1624//
1625__BID_INLINE__ UINT128 *
1626handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627 unsigned *prounding_mode, unsigned *fpsc) {
1628 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629 UINT64 carry, CY;
1630 int ed2, amount;
1631 unsigned rmode, status;
1632
1633 // UF occurs
1634 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635#ifdef SET_STATUS_FLAGS
1636 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637#endif
1638 pres->w[1] = sgn;
1639 pres->w[0] = 0;
1640#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641#ifndef IEEE_ROUND_NEAREST
1642 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643 || (!sgn && *prounding_mode == ROUNDING_UP))
1644 pres->w[0] = 1ull;
1645#endif
1646#endif
1647 return pres;
1648 }
1649
1650 ed2 = 0 - expon;
1651 // add rounding constant to CQ
1652#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653#ifndef IEEE_ROUND_NEAREST
1654 rmode = *prounding_mode;
1655 if (sgn && (unsigned) (rmode - 1) < 2)
1656 rmode = 3 - rmode;
1657#else
1658 rmode = 0;
1659#endif
1660#else
1661 rmode = 0;
1662#endif
1663
84d1fc49 1664 T128 = round_const_table_128[rmode][ed2];
9b6b0236 1665 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667
84d1fc49 1668 TP128 = reciprocals10_128[ed2];
9b6b0236 1669 __mul_128x128_full (Qh, Ql, CQ, TP128);
84d1fc49 1670 amount = recip_scale[ed2];
9b6b0236 1671
1672 if (amount >= 64) {
1673 CQ.w[0] = Qh.w[1] >> (amount - 64);
1674 CQ.w[1] = 0;
1675 } else {
1676 __shr_128 (CQ, Qh, amount);
1677 }
1678
1679 expon = 0;
1680
1681#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682#ifndef IEEE_ROUND_NEAREST
1683 if (!(*prounding_mode))
1684#endif
1685 if (CQ.w[0] & 1) {
1686 // check whether fractional part of initial_P/10^ed1 is exactly .5
1687
1688 // get remainder
1689 __shl_128_long (Qh1, Qh, (128 - amount));
1690
1691 if (!Qh1.w[1] && !Qh1.w[0]
84d1fc49 1692 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1693 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1694 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
9b6b0236 1695 CQ.w[0]--;
1696 }
1697 }
1698#endif
1699
1700#ifdef SET_STATUS_FLAGS
1701
1702 if (is_inexact (fpsc))
1703 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704 else {
1705 status = INEXACT_EXCEPTION;
1706 // get remainder
1707 __shl_128_long (Qh1, Qh, (128 - amount));
1708
1709 switch (rmode) {
1710 case ROUNDING_TO_NEAREST:
1711 case ROUNDING_TIES_AWAY:
1712 // test whether fractional part is 0
1713 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
84d1fc49 1714 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1715 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1716 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
9b6b0236 1717 status = EXACT_STATUS;
1718 break;
1719 case ROUNDING_DOWN:
1720 case ROUNDING_TO_ZERO:
1721 if ((!Qh1.w[1]) && (!Qh1.w[0])
84d1fc49 1722 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1723 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1724 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
9b6b0236 1725 status = EXACT_STATUS;
1726 break;
1727 default:
1728 // round up
1729 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
84d1fc49 1730 reciprocals10_128[ed2].w[0]);
9b6b0236 1731 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
84d1fc49 1732 reciprocals10_128[ed2].w[1], CY);
9b6b0236 1733 __shr_128_long (Qh, Qh1, (128 - amount));
1734 Tmp.w[0] = 1;
1735 Tmp.w[1] = 0;
1736 __shl_128_long (Tmp1, Tmp, amount);
1737 Qh.w[0] += carry;
1738 if (Qh.w[0] < carry)
1739 Qh.w[1]++;
1740 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741 status = EXACT_STATUS;
1742 }
1743
1744 if (status != EXACT_STATUS)
1745 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746 }
1747
1748#endif
1749
1750 pres->w[1] = sgn | CQ.w[1];
1751 pres->w[0] = CQ.w[0];
1752
1753 return pres;
1754
1755}
1756
1757
1758
1759//
1760// BID128 unpack, input passed by value
1761//
1762__BID_INLINE__ UINT64
1763unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
84d1fc49 1764 UINT128 * pcoefficient_x, UINT128 x) {
9b6b0236 1765 UINT128 coeff, T33, T34;
1766 UINT64 ex;
1767
1768 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769
1770 // special encodings
1771 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773 // non-canonical input
1774 pcoefficient_x->w[0] = 0;
1775 pcoefficient_x->w[1] = 0;
1776 ex = (x.w[1]) >> 47;
1777 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778 return 0;
1779 }
1780 // 10^33
84d1fc49 1781 T33 = power10_table_128[33];
1782 /*coeff.w[0] = x.w[0];
1783 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784 pcoefficient_x->w[0] = x.w[0];
1785 pcoefficient_x->w[1] = x.w[1];
1786 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1788
9b6b0236 1789 pcoefficient_x->w[0] = x.w[0];
84d1fc49 1790 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1791 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1792 {
1793 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1794 pcoefficient_x->w[0] = 0;
1795 } else
1796 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1797 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1798 pcoefficient_x->w[0] = 0;
1799 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1800 }
1801 *pexponent_x = 0;
1802 return 0; // NaN or Infinity
9b6b0236 1803 }
1804
1805 coeff.w[0] = x.w[0];
1806 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1807
1808 // 10^34
84d1fc49 1809 T34 = power10_table_128[34];
9b6b0236 1810 // check for non-canonical values
1811 if (__unsigned_compare_ge_128 (coeff, T34))
1812 coeff.w[0] = coeff.w[1] = 0;
1813
1814 pcoefficient_x->w[0] = coeff.w[0];
1815 pcoefficient_x->w[1] = coeff.w[1];
1816
1817 ex = (x.w[1]) >> 49;
1818 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1819
1820 return coeff.w[0] | coeff.w[1];
1821}
1822
1823
1824//
1825// BID128 unpack, input pased by reference
1826//
1827__BID_INLINE__ UINT64
1828unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1829 UINT128 * pcoefficient_x, UINT128 * px) {
1830 UINT128 coeff, T33, T34;
1831 UINT64 ex;
1832
1833 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1834
1835 // special encodings
1836 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1837 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1838 // non-canonical input
1839 pcoefficient_x->w[0] = 0;
1840 pcoefficient_x->w[1] = 0;
1841 ex = (px->w[1]) >> 47;
1842 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1843 return 0;
1844 }
1845 // 10^33
84d1fc49 1846 T33 = power10_table_128[33];
9b6b0236 1847 coeff.w[0] = px->w[0];
1848 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1849 pcoefficient_x->w[0] = px->w[0];
1850 pcoefficient_x->w[1] = px->w[1];
84d1fc49 1851 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
9b6b0236 1852 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
84d1fc49 1853 pcoefficient_x->w[0] = 0;
1854 }
1855 *pexponent_x = 0;
1856 return 0; // NaN or Infinity
9b6b0236 1857 }
1858
1859 coeff.w[0] = px->w[0];
1860 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1861
1862 // 10^34
84d1fc49 1863 T34 = power10_table_128[34];
9b6b0236 1864 // check for non-canonical values
1865 if (__unsigned_compare_ge_128 (coeff, T34))
1866 coeff.w[0] = coeff.w[1] = 0;
1867
1868 pcoefficient_x->w[0] = coeff.w[0];
1869 pcoefficient_x->w[1] = coeff.w[1];
1870
1871 ex = (px->w[1]) >> 49;
1872 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1873
1874 return coeff.w[0] | coeff.w[1];
1875}
1876
1877//
1878// Pack macro checks for overflow, but not underflow
1879//
1880__BID_INLINE__ UINT128 *
1881get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1882 UINT128 coeff, unsigned *prounding_mode,
1883 unsigned *fpsc) {
1884 UINT128 T;
1885 UINT64 tmp, tmp2;
1886
1887 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888
1889 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
84d1fc49 1890 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
9b6b0236 1891 while (__unsigned_compare_gt_128 (T, coeff)
1892 && expon > DECIMAL_MAX_EXPON_128) {
1893 coeff.w[1] =
1894 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1895 (coeff.w[0] >> 63);
1896 tmp2 = coeff.w[0] << 3;
1897 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1898 if (coeff.w[0] < tmp2)
1899 coeff.w[1]++;
1900
1901 expon--;
1902 }
1903 }
1904 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1905 // OF
1906#ifdef SET_STATUS_FLAGS
1907 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1908#endif
1909#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1910#ifndef IEEE_ROUND_NEAREST
1911 if (*prounding_mode == ROUNDING_TO_ZERO
1912 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1913 &&
1914 *prounding_mode
1915 ==
1916 ROUNDING_DOWN))
1917 {
1918 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1919 pres->w[0] = LARGEST_BID128_LOW;
1920 } else
1921#endif
1922#endif
1923 {
1924 pres->w[1] = sgn | INFINITY_MASK64;
1925 pres->w[0] = 0;
1926 }
1927 return pres;
1928 }
1929 }
1930
1931 pres->w[0] = coeff.w[0];
1932 tmp = expon;
1933 tmp <<= 49;
1934 pres->w[1] = sgn | tmp | coeff.w[1];
1935
1936 return pres;
1937}
1938
1939
1940//
1941// No overflow/underflow checks
1942// No checking for coefficient == 10^34 (rounding artifact)
1943//
1944__BID_INLINE__ UINT128 *
1945get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1946 UINT128 coeff) {
1947 UINT64 tmp;
1948
1949 pres->w[0] = coeff.w[0];
1950 tmp = expon;
1951 tmp <<= 49;
1952 pres->w[1] = sgn | tmp | coeff.w[1];
1953
1954 return pres;
1955}
1956
1957//
1958// No overflow/underflow checks
1959//
1960__BID_INLINE__ UINT128 *
1961get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1962 UINT64 tmp;
1963
1964 // coeff==10^34?
1965 if (coeff.w[1] == 0x0001ed09bead87c0ull
1966 && coeff.w[0] == 0x378d8e6400000000ull) {
1967 expon++;
1968 // set coefficient to 10^33
1969 coeff.w[1] = 0x0000314dc6448d93ull;
1970 coeff.w[0] = 0x38c15b0a00000000ull;
1971 }
1972
1973 pres->w[0] = coeff.w[0];
1974 tmp = expon;
1975 tmp <<= 49;
1976 pres->w[1] = sgn | tmp | coeff.w[1];
1977
1978 return pres;
1979}
1980
1981//
1982// General BID128 pack macro
1983//
1984__BID_INLINE__ UINT128 *
1985get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1986 unsigned *prounding_mode, unsigned *fpsc) {
1987 UINT128 T;
1988 UINT64 tmp, tmp2;
1989
1990 // coeff==10^34?
1991 if (coeff.w[1] == 0x0001ed09bead87c0ull
1992 && coeff.w[0] == 0x378d8e6400000000ull) {
1993 expon++;
1994 // set coefficient to 10^33
1995 coeff.w[1] = 0x0000314dc6448d93ull;
1996 coeff.w[0] = 0x38c15b0a00000000ull;
1997 }
1998 // check OF, UF
84d1fc49 1999 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
9b6b0236 2000 // check UF
84d1fc49 2001 if (expon < 0) {
9b6b0236 2002 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2003 fpsc);
84d1fc49 2004 }
9b6b0236 2005
2006 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
84d1fc49 2007 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
9b6b0236 2008 while (__unsigned_compare_gt_128 (T, coeff)
2009 && expon > DECIMAL_MAX_EXPON_128) {
2010 coeff.w[1] =
2011 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2012 (coeff.w[0] >> 63);
2013 tmp2 = coeff.w[0] << 3;
2014 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2015 if (coeff.w[0] < tmp2)
2016 coeff.w[1]++;
2017
2018 expon--;
2019 }
2020 }
84d1fc49 2021 if (expon > DECIMAL_MAX_EXPON_128) {
2022 if (!(coeff.w[1] | coeff.w[0])) {
2023 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2024 pres->w[0] = 0;
2025 return pres;
2026 }
9b6b0236 2027 // OF
2028#ifdef SET_STATUS_FLAGS
2029 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2030#endif
2031#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2032#ifndef IEEE_ROUND_NEAREST
2033 if (*prounding_mode == ROUNDING_TO_ZERO
2034 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2035 &&
2036 *prounding_mode
2037 ==
2038 ROUNDING_DOWN))
2039 {
2040 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2041 pres->w[0] = LARGEST_BID128_LOW;
2042 } else
2043#endif
2044#endif
2045 {
2046 pres->w[1] = sgn | INFINITY_MASK64;
2047 pres->w[0] = 0;
2048 }
2049 return pres;
2050 }
2051 }
2052
2053 pres->w[0] = coeff.w[0];
2054 tmp = expon;
2055 tmp <<= 49;
2056 pres->w[1] = sgn | tmp | coeff.w[1];
2057
2058 return pres;
2059}
2060
2061
2062//
2063// Macro used for conversions from string
2064// (no additional arguments given for rounding mode, status flags)
2065//
2066__BID_INLINE__ UINT128 *
2067get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2068 UINT128 D2, D8;
2069 UINT64 tmp;
2070 unsigned rmode = 0, status;
2071
2072 // coeff==10^34?
2073 if (coeff.w[1] == 0x0001ed09bead87c0ull
2074 && coeff.w[0] == 0x378d8e6400000000ull) {
2075 expon++;
2076 // set coefficient to 10^33
2077 coeff.w[1] = 0x0000314dc6448d93ull;
2078 coeff.w[0] = 0x38c15b0a00000000ull;
2079 }
2080 // check OF, UF
2081 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2082 // check UF
2083 if (expon < 0)
2084 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2085
2086 // OF
2087
2088 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2089 while (expon > DECIMAL_MAX_EXPON_128 &&
84d1fc49 2090 (coeff.w[1] < power10_table_128[33].w[1] ||
2091 (coeff.w[1] == power10_table_128[33].w[1]
2092 && coeff.w[0] < power10_table_128[33].w[0]))) {
9b6b0236 2093 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2094 D2.w[0] = coeff.w[0] << 1;
2095 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2096 D8.w[0] = coeff.w[0] << 3;
2097
2098 __add_128_128 (coeff, D2, D8);
2099 expon--;
2100 }
2101 } else if (!(coeff.w[0] | coeff.w[1]))
2102 expon = DECIMAL_MAX_EXPON_128;
2103
2104 if (expon > DECIMAL_MAX_EXPON_128) {
2105 pres->w[1] = sgn | INFINITY_MASK64;
2106 pres->w[0] = 0;
2107 switch (rmode) {
2108 case ROUNDING_DOWN:
2109 if (!sgn) {
2110 pres->w[1] = LARGEST_BID128_HIGH;
2111 pres->w[0] = LARGEST_BID128_LOW;
2112 }
2113 break;
2114 case ROUNDING_TO_ZERO:
2115 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2116 pres->w[0] = LARGEST_BID128_LOW;
2117 break;
2118 case ROUNDING_UP:
2119 // round up
2120 if (sgn) {
2121 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2122 pres->w[0] = LARGEST_BID128_LOW;
2123 }
2124 break;
2125 }
2126
2127 return pres;
2128 }
2129 }
2130
2131 pres->w[0] = coeff.w[0];
2132 tmp = expon;
2133 tmp <<= 49;
2134 pres->w[1] = sgn | tmp | coeff.w[1];
2135
2136 return pres;
2137}
2138
2139
2140
2141/*****************************************************************************
2142*
2143* BID32 pack/unpack macros
2144*
2145*****************************************************************************/
2146
2147
2148__BID_INLINE__ UINT32
2149unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2150 UINT32 * pcoefficient_x, UINT32 x) {
2151 UINT32 tmp;
2152
2153 *psign_x = x & 0x80000000;
2154
2155 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2156 // special encodings
84d1fc49 2157 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2158 *pcoefficient_x = x & 0xfe0fffff;
2159 if ((x & 0x000fffff) >= 1000000)
2160 *pcoefficient_x = x & 0xfe000000;
2161 if ((x & NAN_MASK32) == INFINITY_MASK32)
2162 *pcoefficient_x = x & 0xf8000000;
2163 *pexponent_x = 0;
9b6b0236 2164 return 0; // NaN or Infinity
84d1fc49 2165 }
9b6b0236 2166 // coefficient
2167 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2168 // check for non-canonical value
2169 if (*pcoefficient_x >= 10000000)
2170 *pcoefficient_x = 0;
2171 // get exponent
2172 tmp = x >> 21;
2173 *pexponent_x = tmp & EXPONENT_MASK32;
2174 return 1;
2175 }
2176 // exponent
2177 tmp = x >> 23;
2178 *pexponent_x = tmp & EXPONENT_MASK32;
2179 // coefficient
2180 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2181
2182 return *pcoefficient_x;
2183}
2184
2185//
2186// General pack macro for BID32
2187//
2188__BID_INLINE__ UINT32
2189get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2190 unsigned *fpsc) {
2191 UINT128 Q;
2192 UINT64 C64, remainder_h, carry, Stemp;
2193 UINT32 r, mask;
2194 int extra_digits, amount, amount2;
2195 unsigned status;
2196
2197 if (coeff > 9999999ull) {
2198 expon++;
2199 coeff = 1000000ull;
2200 }
2201 // check for possible underflow/overflow
2202 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2203 if (expon < 0) {
2204 // underflow
2205 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2206#ifdef SET_STATUS_FLAGS
2207 __set_status_flags (fpsc,
2208 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2209#endif
2210#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2211#ifndef IEEE_ROUND_NEAREST
2212 if (rmode == ROUNDING_DOWN && sgn)
2213 return 0x80000001;
2214 if (rmode == ROUNDING_UP && !sgn)
2215 return 1;
2216#endif
2217#endif
2218 // result is 0
2219 return sgn;
2220 }
2221 // get digits to be shifted out
2222#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2223 rmode = 0;
2224#endif
2225#ifdef IEEE_ROUND_NEAREST
2226 rmode = 0;
84d1fc49 2227#endif
2228#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2229#ifndef IEEE_ROUND_NEAREST
2230 if (sgn && (unsigned) (rmode - 1) < 2)
2231 rmode = 3 - rmode;
2232#endif
9b6b0236 2233#endif
2234
2235 extra_digits = -expon;
84d1fc49 2236 coeff += round_const_table[rmode][extra_digits];
9b6b0236 2237
2238 // get coeff*(2^M[extra_digits])/10^extra_digits
84d1fc49 2239 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
9b6b0236 2240
2241 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
84d1fc49 2242 amount = short_recip_scale[extra_digits];
9b6b0236 2243
2244 C64 = Q.w[1] >> amount;
2245
2246#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2247#ifndef IEEE_ROUND_NEAREST
84d1fc49 2248 if (rmode == 0) //ROUNDING_TO_NEAREST
9b6b0236 2249#endif
2250 if (C64 & 1) {
2251 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2252
2253 // get remainder
2254 amount2 = 64 - amount;
2255 remainder_h = 0;
2256 remainder_h--;
2257 remainder_h >>= amount2;
2258 remainder_h = remainder_h & Q.w[1];
2259
84d1fc49 2260 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
9b6b0236 2261 C64--;
2262 }
2263 }
2264#endif
2265
2266#ifdef SET_STATUS_FLAGS
2267
2268 if (is_inexact (fpsc))
2269 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2270 else {
2271 status = INEXACT_EXCEPTION;
2272 // get remainder
2273 remainder_h = Q.w[1] << (64 - amount);
2274
2275 switch (rmode) {
2276 case ROUNDING_TO_NEAREST:
2277 case ROUNDING_TIES_AWAY:
2278 // test whether fractional part is 0
2279 if (remainder_h == 0x8000000000000000ull
84d1fc49 2280 && (Q.w[0] < reciprocals10_64[extra_digits]))
9b6b0236 2281 status = EXACT_STATUS;
2282 break;
2283 case ROUNDING_DOWN:
2284 case ROUNDING_TO_ZERO:
84d1fc49 2285 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
9b6b0236 2286 status = EXACT_STATUS;
2287 break;
2288 default:
2289 // round up
2290 __add_carry_out (Stemp, carry, Q.w[0],
84d1fc49 2291 reciprocals10_64[extra_digits]);
9b6b0236 2292 if ((remainder_h >> (64 - amount)) + carry >=
2293 (((UINT64) 1) << amount))
2294 status = EXACT_STATUS;
2295 }
2296
2297 if (status != EXACT_STATUS)
2298 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2299 }
2300
2301#endif
2302
2303 return sgn | (UINT32) C64;
2304 }
2305
2306 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2307 coeff = (coeff << 3) + (coeff << 1);
2308 expon--;
2309 }
2310 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2311 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2312 // overflow
2313 r = sgn | INFINITY_MASK32;
2314 switch (rmode) {
2315 case ROUNDING_DOWN:
2316 if (!sgn)
2317 r = LARGEST_BID32;
2318 break;
2319 case ROUNDING_TO_ZERO:
2320 r = sgn | LARGEST_BID32;
2321 break;
2322 case ROUNDING_UP:
2323 // round up
2324 if (sgn)
2325 r = sgn | LARGEST_BID32;
2326 }
2327 return r;
2328 }
2329 }
2330
2331 mask = 1 << 23;
2332
2333 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2334 if (coeff < mask) {
2335 r = expon;
2336 r <<= 23;
2337 r |= ((UINT32) coeff | sgn);
2338 return r;
2339 }
2340 // special format
2341
2342 r = expon;
2343 r <<= 21;
2344 r |= (sgn | SPECIAL_ENCODING_MASK32);
2345 // add coeff, without leading bits
2346 mask = (1 << 21) - 1;
2347 r |= (((UINT32) coeff) & mask);
2348
2349 return r;
2350}
2351
2352
2353
2354//
2355// no overflow/underflow checks
2356//
2357__BID_INLINE__ UINT32
2358very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2359 UINT32 r, mask;
2360
2361 mask = 1 << 23;
2362
2363 // check whether coefficient fits in 10*2+3 bits
2364 if (coeff < mask) {
2365 r = expon;
2366 r <<= 23;
2367 r |= (coeff | sgn);
2368 return r;
2369 }
2370 // special format
2371 r = expon;
2372 r <<= 21;
2373 r |= (sgn | SPECIAL_ENCODING_MASK32);
2374 // add coeff, without leading bits
2375 mask = (1 << 21) - 1;
2376 coeff &= mask;
2377 r |= coeff;
2378
2379 return r;
2380}
2381
2382
2383
2384/*************************************************************
2385 *
2386 *************************************************************/
2387typedef
2388ALIGN (16)
2389 struct {
2390 UINT64 w[6];
2391 } UINT384;
2392 typedef ALIGN (16)
2393 struct {
2394 UINT64 w[8];
2395 } UINT512;
2396
2397// #define P 34
2398#define MASK_STEERING_BITS 0x6000000000000000ull
2399#define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2400#define MASK_BINARY_SIG1 0x001fffffffffffffull
2401#define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2402 //used to take G[2:w+3] (sec 3.3)
2403#define MASK_BINARY_SIG2 0x0007ffffffffffffull
2404 //used to mask out G4:T0 (sec 3.3)
2405#define MASK_BINARY_OR2 0x0020000000000000ull
2406 //used to prefix 8+G4 to T (sec 3.3)
2407#define UPPER_EXPON_LIMIT 51
2408#define MASK_EXP 0x7ffe000000000000ull
2409#define MASK_SPECIAL 0x7800000000000000ull
2410#define MASK_NAN 0x7c00000000000000ull
2411#define MASK_SNAN 0x7e00000000000000ull
2412#define MASK_ANY_INF 0x7c00000000000000ull
2413#define MASK_INF 0x7800000000000000ull
2414#define MASK_SIGN 0x8000000000000000ull
2415#define MASK_COEFF 0x0001ffffffffffffull
2416#define BIN_EXP_BIAS (0x1820ull << 49)
2417
2418#define EXP_MIN 0x0000000000000000ull
2419 // EXP_MIN = (-6176 + 6176) << 49
2420#define EXP_MAX 0x5ffe000000000000ull
2421 // EXP_MAX = (6111 + 6176) << 49
2422#define EXP_MAX_P1 0x6000000000000000ull
2423 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2424#define EXP_P1 0x0002000000000000ull
2425 // EXP_ P1= 1 << 49
2426#define expmin -6176
2427 // min unbiased exponent
2428#define expmax 6111
2429 // max unbiased exponent
2430#define expmin16 -398
2431 // min unbiased exponent
2432#define expmax16 369
2433 // max unbiased exponent
2434
2435#define SIGNMASK32 0x80000000
2436#define BID64_SIG_MAX 0x002386F26FC0ffffull
2437#define SIGNMASK64 0x8000000000000000ull
2438
2439// typedef unsigned int FPSC; // floating-point status and control
2440 // bit31:
2441 // bit30:
2442 // bit29:
2443 // bit28:
2444 // bit27:
2445 // bit26:
2446 // bit25:
2447 // bit24:
2448 // bit23:
2449 // bit22:
2450 // bit21:
2451 // bit20:
2452 // bit19:
2453 // bit18:
2454 // bit17:
2455 // bit16:
2456 // bit15:
2457 // bit14: RC:2
2458 // bit13: RC:1
2459 // bit12: RC:0
2460 // bit11: PM
2461 // bit10: UM
2462 // bit9: OM
2463 // bit8: ZM
2464 // bit7: DM
2465 // bit6: IM
2466 // bit5: PE
2467 // bit4: UE
2468 // bit3: OE
2469 // bit2: ZE
2470 // bit1: DE
2471 // bit0: IE
2472
2473#define ROUNDING_MODE_MASK 0x00007000
2474
2475 typedef struct _DEC_DIGITS {
2476 unsigned int digits;
2477 UINT64 threshold_hi;
2478 UINT64 threshold_lo;
2479 unsigned int digits1;
2480 } DEC_DIGITS;
2481
84d1fc49 2482 extern DEC_DIGITS nr_digits[];
2483 extern UINT64 midpoint64[];
2484 extern UINT128 midpoint128[];
2485 extern UINT192 midpoint192[];
2486 extern UINT256 midpoint256[];
2487 extern UINT64 ten2k64[];
2488 extern UINT128 ten2k128[];
2489 extern UINT256 ten2k256[];
2490 extern UINT128 ten2mk128[];
2491 extern UINT64 ten2mk64[];
2492 extern UINT128 ten2mk128trunc[];
2493 extern int shiftright128[];
2494 extern UINT64 maskhigh128[];
2495 extern UINT64 maskhigh128M[];
2496 extern UINT64 maskhigh192M[];
2497 extern UINT64 maskhigh256M[];
2498 extern UINT64 onehalf128[];
2499 extern UINT64 onehalf128M[];
2500 extern UINT64 onehalf192M[];
2501 extern UINT64 onehalf256M[];
2502 extern UINT128 ten2mk128M[];
2503 extern UINT128 ten2mk128truncM[];
2504 extern UINT192 ten2mk192truncM[];
2505 extern UINT256 ten2mk256truncM[];
2506 extern int shiftright128M[];
2507 extern int shiftright192M[];
2508 extern int shiftright256M[];
2509 extern UINT192 ten2mk192M[];
2510 extern UINT256 ten2mk256M[];
2511 extern unsigned char char_table2[];
2512 extern unsigned char char_table3[];
2513
2514 extern UINT64 ten2m3k64[];
2515 extern unsigned int shift_ten2m3k64[];
2516 extern UINT128 ten2m3k128[];
2517 extern unsigned int shift_ten2m3k128[];
9b6b0236 2518
2519
2520
2521/***************************************************************************
2522 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2523 ***************************************************************************/
2524
84d1fc49 2525 extern UINT64 Kx64[];
2526 extern unsigned int Ex64m64[];
2527 extern UINT64 half64[];
2528 extern UINT64 mask64[];
2529 extern UINT64 ten2mxtrunc64[];
2530
2531 extern UINT128 Kx128[];
2532 extern unsigned int Ex128m128[];
2533 extern UINT64 half128[];
2534 extern UINT64 mask128[];
2535 extern UINT128 ten2mxtrunc128[];
2536
2537 extern UINT192 Kx192[];
2538 extern unsigned int Ex192m192[];
2539 extern UINT64 half192[];
2540 extern UINT64 mask192[];
2541 extern UINT192 ten2mxtrunc192[];
2542
2543 extern UINT256 Kx256[];
2544 extern unsigned int Ex256m256[];
2545 extern UINT64 half256[];
2546 extern UINT64 mask256[];
2547 extern UINT256 ten2mxtrunc256[];
9b6b0236 2548
2549 typedef union __bid64_128 {
2550 UINT64 b64;
2551 UINT128 b128;
2552 } BID64_128;
2553
2554 BID64_128 bid_fma (unsigned int P0,
2555 BID64_128 x1, unsigned int P1,
2556 BID64_128 y1, unsigned int P2,
2557 BID64_128 z1, unsigned int P3,
2558 unsigned int rnd_mode, FPSC * fpsc);
2559
2560#define P16 16
2561#define P34 34
2562
2563 union __int_double {
2564 UINT64 i;
2565 double d;
2566 };
2567 typedef union __int_double int_double;
2568
2569
2570 union __int_float {
2571 UINT32 i;
2572 float d;
2573 };
2574 typedef union __int_float int_float;
2575
2576#define SWAP(A,B,T) {\
2577 T = A; \
2578 A = B; \
2579 B = T; \
2580}
2581
2582// this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2583// ie it knows that it is A bits long
2584#define NUMBITS(A, coefficient_x, tempx){\
2585 temp_x.d=(float)coefficient_x;\
2586 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2587}
2588
2589 enum class_types {
2590 signalingNaN,
2591 quietNaN,
2592 negativeInfinity,
2593 negativeNormal,
2594 negativeSubnormal,
2595 negativeZero,
2596 positiveZero,
2597 positiveSubnormal,
2598 positiveNormal,
2599 positiveInfinity
2600 };
2601
84d1fc49 2602 typedef union {
2603 UINT64 ui64;
2604 double d;
2605 } BID_UI64DOUBLE;
9b6b0236 2606
84d1fc49 2607#endif