]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid_internal.h
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / libbid / bid_internal.h
CommitLineData
83ffe9cd 1/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
200359e8
L
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
748086b7 7Software Foundation; either version 3, or (at your option) any later
200359e8
L
8version.
9
200359e8
L
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
748086b7
JJ
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
200359e8
L
23
24#ifndef __BIDECIMAL_H
25#define __BIDECIMAL_H
26
27#include "bid_conf.h"
28#include "bid_functions.h"
29
200359e8
L
30#define __BID_INLINE__ static __inline
31
32/*********************************************************************
33 *
34 * Logical Shift Macros
35 *
36 *********************************************************************/
37
38#define __shr_128(Q, A, k) \
39{ \
40 (Q).w[0] = (A).w[0] >> k; \
41 (Q).w[0] |= (A).w[1] << (64-k); \
42 (Q).w[1] = (A).w[1] >> k; \
43}
44
45#define __shr_128_long(Q, A, k) \
46{ \
47 if((k)<64) { \
48 (Q).w[0] = (A).w[0] >> k; \
49 (Q).w[0] |= (A).w[1] << (64-k); \
50 (Q).w[1] = (A).w[1] >> k; \
51 } \
52 else { \
53 (Q).w[0] = (A).w[1]>>((k)-64); \
54 (Q).w[1] = 0; \
55 } \
56}
57
58#define __shl_128_long(Q, A, k) \
59{ \
60 if((k)<64) { \
61 (Q).w[1] = (A).w[1] << k; \
62 (Q).w[1] |= (A).w[0] >> (64-k); \
63 (Q).w[0] = (A).w[0] << k; \
64 } \
65 else { \
66 (Q).w[1] = (A).w[0]<<((k)-64); \
67 (Q).w[0] = 0; \
68 } \
69}
70
71#define __low_64(Q) (Q).w[0]
72/*********************************************************************
73 *
74 * String Macros
75 *
76 *********************************************************************/
77#define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78/*********************************************************************
79 *
80 * Compare Macros
81 *
82 *********************************************************************/
83// greater than
84// return 0 if A<=B
85// non-zero if A>B
86#define __unsigned_compare_gt_128(A, B) \
87 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88// greater-or-equal
89#define __unsigned_compare_ge_128(A, B) \
90 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91#define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92// tighten exponent range
93#define __tight_bin_range_128(bp, P, bin_expon) \
94{ \
95UINT64 M; \
96 M = 1; \
97 (bp) = (bin_expon); \
98 if((bp)<63) { \
99 M <<= ((bp)+1); \
100 if((P).w[0] >= M) (bp)++; } \
101 else if((bp)>64) { \
102 M <<= ((bp)+1-64); \
103 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104 (bp)++; } \
105 else if((P).w[1]) (bp)++; \
106}
107/*********************************************************************
108 *
109 * Add/Subtract Macros
110 *
111 *********************************************************************/
112// add 64-bit value to 128-bit
113#define __add_128_64(R128, A128, B64) \
114{ \
115UINT64 R64H; \
116 R64H = (A128).w[1]; \
117 (R128).w[0] = (B64) + (A128).w[0]; \
118 if((R128).w[0] < (B64)) \
119 R64H ++; \
120 (R128).w[1] = R64H; \
121}
122// subtract 64-bit value from 128-bit
123#define __sub_128_64(R128, A128, B64) \
124{ \
125UINT64 R64H; \
126 R64H = (A128).w[1]; \
127 if((A128).w[0] < (B64)) \
128 R64H --; \
129 (R128).w[1] = R64H; \
130 (R128).w[0] = (A128).w[0] - (B64); \
131}
132// add 128-bit value to 128-bit
133// assume no carry-out
134#define __add_128_128(R128, A128, B128) \
135{ \
136UINT128 Q128; \
137 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
139 if(Q128.w[0] < (B128).w[0]) \
140 Q128.w[1] ++; \
141 (R128).w[1] = Q128.w[1]; \
142 (R128).w[0] = Q128.w[0]; \
143}
144#define __sub_128_128(R128, A128, B128) \
145{ \
146UINT128 Q128; \
147 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
149 if((A128).w[0] < (B128).w[0]) \
150 Q128.w[1] --; \
151 (R128).w[1] = Q128.w[1]; \
152 (R128).w[0] = Q128.w[0]; \
153}
154#define __add_carry_out(S, CY, X, Y) \
155{ \
156UINT64 X1=X; \
157 S = X + Y; \
158 CY = (S<X1) ? 1 : 0; \
159}
160#define __add_carry_in_out(S, CY, X, Y, CI) \
161{ \
162UINT64 X1; \
163 X1 = X + CI; \
164 S = X1 + Y; \
165 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
166}
167#define __sub_borrow_out(S, CY, X, Y) \
168{ \
169UINT64 X1=X; \
170 S = X - Y; \
171 CY = (S>X1) ? 1 : 0; \
172}
173#define __sub_borrow_in_out(S, CY, X, Y, CI) \
174{ \
175UINT64 X1, X0=X; \
176 X1 = X - CI; \
177 S = X1 - Y; \
178 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
179}
180// increment C128 and check for rounding overflow:
181// if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182#define INCREMENT(C_128, exp) \
183{ \
184 C_128.w[0]++; \
185 if (C_128.w[0] == 0) C_128.w[1]++; \
186 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
187 C_128.w[0] == 0x378d8e6400000000ull) { \
188 exp++; \
189 C_128.w[1] = 0x0000314dc6448d93ull; \
190 C_128.w[0] = 0x38c15b0a00000000ull; \
191 } \
192}
193// decrement C128 and check for rounding underflow, but only at the
194// boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195// and decrement the exponent
196#define DECREMENT(C_128, exp) \
197{ \
198 C_128.w[0]--; \
199 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
200 if (C_128.w[1] == 0x0000314dc6448d93ull && \
201 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
202 exp--; \
203 C_128.w[1] = 0x0001ed09bead87c0ull; \
204 C_128.w[0] = 0x378d8e63ffffffffull; \
205 } \
206}
b2a00c89 207
200359e8
L
208 /*********************************************************************
209 *
210 * Multiply Macros
211 *
212 *********************************************************************/
213#define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
214/***************************************
215 * Signed, Full 64x64-bit Multiply
216 ***************************************/
217#define __imul_64x64_to_128(P, CX, CY) \
218{ \
219UINT64 SX, SY; \
220 __mul_64x64_to_128(P, CX, CY); \
221 \
222 SX = ((SINT64)(CX))>>63; \
223 SY = ((SINT64)(CY))>>63; \
224 SX &= CY; SY &= CX; \
225 \
226 (P).w[1] = (P).w[1] - SX - SY; \
227}
228/***************************************
229 * Signed, Full 64x128-bit Multiply
230 ***************************************/
231#define __imul_64x128_full(Ph, Ql, A, B) \
232{ \
233UINT128 ALBL, ALBH, QM2, QM; \
234 \
235 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
236 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
237 \
238 (Ql).w[0] = ALBL.w[0]; \
239 QM.w[0] = ALBL.w[1]; \
240 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
241 __add_128_128(QM2, ALBH, QM); \
242 (Ql).w[1] = QM2.w[0]; \
243 Ph = QM2.w[1]; \
244}
245/*****************************************************
246 * Unsigned Multiply Macros
247 *****************************************************/
248// get full 64x64bit product
249//
250#define __mul_64x64_to_128(P, CX, CY) \
251{ \
252UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253 CXH = (CX) >> 32; \
254 CXL = (UINT32)(CX); \
255 CYH = (CY) >> 32; \
256 CYL = (UINT32)(CY); \
257 \
258 PM = CXH*CYL; \
259 PH = CXH*CYH; \
260 PL = CXL*CYL; \
261 PM2 = CXL*CYH; \
262 PH += (PM>>32); \
263 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264 \
265 (P).w[1] = PH + (PM>>32); \
266 (P).w[0] = (PM<<32)+(UINT32)PL; \
267}
268// get full 64x64bit product
269// Note:
270// This macro is used for CX < 2^61, CY < 2^61
271//
272#define __mul_64x64_to_128_fast(P, CX, CY) \
273{ \
274UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
275 CXH = (CX) >> 32; \
276 CXL = (UINT32)(CX); \
277 CYH = (CY) >> 32; \
278 CYL = (UINT32)(CY); \
279 \
280 PM = CXH*CYL; \
281 PL = CXL*CYL; \
282 PH = CXH*CYH; \
283 PM += CXL*CYH; \
284 PM += (PL>>32); \
285 \
286 (P).w[1] = PH + (PM>>32); \
287 (P).w[0] = (PM<<32)+(UINT32)PL; \
288}
289// used for CX< 2^60
290#define __sqr64_fast(P, CX) \
291{ \
292UINT64 CXH, CXL, PL, PH, PM; \
293 CXH = (CX) >> 32; \
294 CXL = (UINT32)(CX); \
295 \
296 PM = CXH*CXL; \
297 PL = CXL*CXL; \
298 PH = CXH*CXH; \
299 PM += PM; \
300 PM += (PL>>32); \
301 \
302 (P).w[1] = PH + (PM>>32); \
303 (P).w[0] = (PM<<32)+(UINT32)PL; \
304}
305// get full 64x64bit product
306// Note:
307// This implementation is used for CX < 2^61, CY < 2^61
308//
309#define __mul_64x64_to_64_high_fast(P, CX, CY) \
310{ \
311UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
312 CXH = (CX) >> 32; \
313 CXL = (UINT32)(CX); \
314 CYH = (CY) >> 32; \
315 CYL = (UINT32)(CY); \
316 \
317 PM = CXH*CYL; \
318 PL = CXL*CYL; \
319 PH = CXH*CYH; \
320 PM += CXL*CYH; \
321 PM += (PL>>32); \
322 \
323 (P) = PH + (PM>>32); \
324}
325// get full 64x64bit product
326//
327#define __mul_64x64_to_128_full(P, CX, CY) \
328{ \
329UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330 CXH = (CX) >> 32; \
331 CXL = (UINT32)(CX); \
332 CYH = (CY) >> 32; \
333 CYL = (UINT32)(CY); \
334 \
335 PM = CXH*CYL; \
336 PH = CXH*CYH; \
337 PL = CXL*CYL; \
338 PM2 = CXL*CYH; \
339 PH += (PM>>32); \
340 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341 \
342 (P).w[1] = PH + (PM>>32); \
343 (P).w[0] = (PM<<32)+(UINT32)PL; \
344}
345#define __mul_128x128_high(Q, A, B) \
346{ \
347UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
348 \
349 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
350 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
351 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
352 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
353 \
354 __add_128_128(QM, ALBH, AHBL); \
355 __add_128_64(QM2, QM, ALBL.w[1]); \
356 __add_128_64((Q), AHBH, QM2.w[1]); \
357}
358#define __mul_128x128_full(Qh, Ql, A, B) \
359{ \
360UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
361 \
362 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
363 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
364 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
365 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
366 \
367 __add_128_128(QM, ALBH, AHBL); \
368 (Ql).w[0] = ALBL.w[0]; \
369 __add_128_64(QM2, QM, ALBL.w[1]); \
370 __add_128_64((Qh), AHBH, QM2.w[1]); \
371 (Ql).w[1] = QM2.w[0]; \
372}
373#define __mul_128x128_low(Ql, A, B) \
374{ \
375UINT128 ALBL; \
376UINT64 QM64; \
377 \
378 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
379 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
380 \
381 (Ql).w[0] = ALBL.w[0]; \
382 (Ql).w[1] = QM64 + ALBL.w[1]; \
383}
384#define __mul_64x128_low(Ql, A, B) \
385{ \
386 UINT128 ALBL, ALBH, QM2; \
387 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
388 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
389 (Ql).w[0] = ALBL.w[0]; \
390 __add_128_64(QM2, ALBH, ALBL.w[1]); \
391 (Ql).w[1] = QM2.w[0]; \
392}
393#define __mul_64x128_full(Ph, Ql, A, B) \
394{ \
395UINT128 ALBL, ALBH, QM2; \
396 \
397 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
398 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
399 \
400 (Ql).w[0] = ALBL.w[0]; \
401 __add_128_64(QM2, ALBH, ALBL.w[1]); \
402 (Ql).w[1] = QM2.w[0]; \
403 Ph = QM2.w[1]; \
404}
405#define __mul_64x128_to_192(Q, A, B) \
406{ \
407UINT128 ALBL, ALBH, QM2; \
408 \
409 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
410 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
411 \
412 (Q).w[0] = ALBL.w[0]; \
413 __add_128_64(QM2, ALBH, ALBL.w[1]); \
414 (Q).w[1] = QM2.w[0]; \
415 (Q).w[2] = QM2.w[1]; \
416}
417#define __mul_64x128_to192(Q, A, B) \
418{ \
419UINT128 ALBL, ALBH, QM2; \
420 \
421 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
422 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
423 \
424 (Q).w[0] = ALBL.w[0]; \
425 __add_128_64(QM2, ALBH, ALBL.w[1]); \
426 (Q).w[1] = QM2.w[0]; \
427 (Q).w[2] = QM2.w[1]; \
428}
429#define __mul_128x128_to_256(P256, A, B) \
430{ \
431UINT128 Qll, Qlh; \
432UINT64 Phl, Phh, CY1, CY2; \
433 \
434 __mul_64x128_full(Phl, Qll, A.w[0], B); \
435 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
436 (P256).w[0] = Qll.w[0]; \
437 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
438 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
439 (P256).w[3] = Phh + CY2; \
440}
441//
442// For better performance, will check A.w[1] against 0,
443// but not B.w[1]
444// Use this macro accordingly
445#define __mul_128x128_to_256_check_A(P256, A, B) \
446{ \
447UINT128 Qll, Qlh; \
448UINT64 Phl, Phh, CY1, CY2; \
449 \
450 __mul_64x128_full(Phl, Qll, A.w[0], B); \
451 (P256).w[0] = Qll.w[0]; \
452 if(A.w[1]) { \
453 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
456 (P256).w[3] = Phh + CY2; } \
457 else { \
458 (P256).w[1] = Qll.w[1]; \
459 (P256).w[2] = Phl; \
460 (P256).w[3] = 0; } \
461}
462#define __mul_64x192_to_256(lP, lA, lB) \
463{ \
464UINT128 lP0,lP1,lP2; \
465UINT64 lC; \
466 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
467 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
468 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
469 (lP).w[0] = lP0.w[0]; \
470 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
471 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472 (lP).w[3] = lP2.w[1] + lC; \
473}
474#define __mul_64x256_to_320(P, A, B) \
475{ \
476UINT128 lP0,lP1,lP2,lP3; \
477UINT64 lC; \
478 __mul_64x64_to_128(lP0, A, (B).w[0]); \
479 __mul_64x64_to_128(lP1, A, (B).w[1]); \
480 __mul_64x64_to_128(lP2, A, (B).w[2]); \
481 __mul_64x64_to_128(lP3, A, (B).w[3]); \
482 (P).w[0] = lP0.w[0]; \
483 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
484 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486 (P).w[4] = lP3.w[1] + lC; \
487}
488#define __mul_192x192_to_384(P, A, B) \
489{ \
490UINT256 P0,P1,P2; \
491UINT64 CY; \
492 __mul_64x192_to_256(P0, (A).w[0], B); \
493 __mul_64x192_to_256(P1, (A).w[1], B); \
494 __mul_64x192_to_256(P2, (A).w[2], B); \
495 (P).w[0] = P0.w[0]; \
496 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
497 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499 (P).w[4] = P1.w[3] + CY; \
500 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
501 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
502 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
503 (P).w[5] = P2.w[3] + CY; \
504}
505#define __mul_64x320_to_384(P, A, B) \
506{ \
507UINT128 lP0,lP1,lP2,lP3,lP4; \
508UINT64 lC; \
509 __mul_64x64_to_128(lP0, A, (B).w[0]); \
510 __mul_64x64_to_128(lP1, A, (B).w[1]); \
511 __mul_64x64_to_128(lP2, A, (B).w[2]); \
512 __mul_64x64_to_128(lP3, A, (B).w[3]); \
513 __mul_64x64_to_128(lP4, A, (B).w[4]); \
514 (P).w[0] = lP0.w[0]; \
515 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
516 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519 (P).w[5] = lP4.w[1] + lC; \
520}
521// A*A
522// Full 128x128-bit product
523#define __sqr128_to_256(P256, A) \
524{ \
525UINT128 Qll, Qlh, Qhh; \
526UINT64 TMP_C1, TMP_C2; \
527 \
528 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
529 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
530 Qhh.w[1] += (Qlh.w[1]>>63); \
531 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
532 Qlh.w[0] += Qlh.w[0]; \
533 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
534 \
535 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
536 (P256).w[0] = Qll.w[0]; \
537 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
538 (P256).w[3] = Qhh.w[1]+TMP_C2; \
539}
540#define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
541{ \
542UINT128 Qll, Qlh; \
543UINT64 Phl, Phh, C1, C2; \
544 \
545 __mul_64x128_full(Phl, Qll, A.w[0], B); \
546 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
547 (PQl).w[0] = Qll.w[0]; \
548 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
549 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
550 (PQh).w[1] = Phh + C2; \
551}
552#define __mul_256x256_to_512(P, A, B) \
553{ \
554UINT512 P0,P1,P2,P3; \
555UINT64 CY; \
556 __mul_64x256_to_320(P0, (A).w[0], B); \
557 __mul_64x256_to_320(P1, (A).w[1], B); \
558 __mul_64x256_to_320(P2, (A).w[2], B); \
559 __mul_64x256_to_320(P3, (A).w[3], B); \
560 (P).w[0] = P0.w[0]; \
561 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
562 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565 (P).w[5] = P1.w[4] + CY; \
566 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
567 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
568 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
569 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
570 (P).w[6] = P2.w[4] + CY; \
571 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
572 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
573 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
574 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
575 (P).w[7] = P3.w[4] + CY; \
576}
577#define __mul_192x256_to_448(P, A, B) \
578{ \
579UINT512 P0,P1,P2; \
580UINT64 CY; \
581 __mul_64x256_to_320(P0, (A).w[0], B); \
582 __mul_64x256_to_320(P1, (A).w[1], B); \
583 __mul_64x256_to_320(P2, (A).w[2], B); \
584 (P).w[0] = P0.w[0]; \
585 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
586 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589 (P).w[5] = P1.w[4] + CY; \
590 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
591 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
592 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
593 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
594 (P).w[6] = P2.w[4] + CY; \
595}
596#define __mul_320x320_to_640(P, A, B) \
597{ \
598UINT512 P0,P1,P2,P3; \
599UINT64 CY; \
600 __mul_256x256_to_512((P), (A), B); \
601 __mul_64x256_to_320(P1, (A).w[4], B); \
602 __mul_64x256_to_320(P2, (B).w[4], A); \
603 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
604 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
605 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609 P3.w[1] += CY; \
610 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
611 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615 (P).w[9] = P3.w[1] + CY; \
616}
617#define __mul_384x384_to_768(P, A, B) \
618{ \
619UINT512 P0,P1,P2,P3; \
620UINT64 CY; \
621 __mul_320x320_to_640((P), (A), B); \
622 __mul_64x320_to_384(P1, (A).w[5], B); \
623 __mul_64x320_to_384(P2, (B).w[5], A); \
624 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
625 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
626 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631 P3.w[1] += CY; \
632 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
633 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638 (P).w[11] = P3.w[1] + CY; \
639}
640#define __mul_64x128_short(Ql, A, B) \
641{ \
642UINT64 ALBH_L; \
643 \
644 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
645 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
646 \
647 (Ql).w[1] += ALBH_L; \
648}
649#define __scale128_10(D,_TMP) \
650{ \
651UINT128 _TMP2,_TMP8; \
652 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
653 _TMP2.w[0] = _TMP.w[0]<<1; \
654 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
655 _TMP8.w[0] = _TMP.w[0]<<3; \
656 __add_128_128(D, _TMP2, _TMP8); \
657}
658// 64x64-bit product
659#define __mul_64x64_to_128MACH(P128, CX64, CY64) \
660{ \
661 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
662 CXH = (CX64) >> 32; \
663 CXL = (UINT32)(CX64); \
664 CYH = (CY64) >> 32; \
665 CYL = (UINT32)(CY64); \
666 PM = CXH*CYL; \
667 PH = CXH*CYH; \
668 PL = CXL*CYL; \
669 PM2 = CXL*CYH; \
670 PH += (PM>>32); \
671 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
672 (P128).w[1] = PH + (PM>>32); \
673 (P128).w[0] = (PM<<32)+(UINT32)PL; \
674}
675// 64x64-bit product
676#define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
677{ \
678 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
679 CXH = (CX64) >> 32; \
680 CXL = (UINT32)(CX64); \
681 CYH = (CY64) >> 32; \
682 CYL = (UINT32)(CY64); \
683 PM = CXH*CYL; \
684 PH = CXH*CYH; \
685 PL = CXL*CYL; \
686 PM2 = CXL*CYH; \
687 PH += (PM>>32); \
688 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
689 P64 = PH + (PM>>32); \
690}
691#define __mul_128x64_to_128(Q128, A64, B128) \
692{ \
693 UINT64 ALBH_L; \
694 ALBH_L = (A64) * (B128).w[1]; \
695 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
696 (Q128).w[1] += ALBH_L; \
697}
698// might simplify by calculating just QM2.w[0]
699#define __mul_64x128_to_128(Ql, A, B) \
700{ \
701 UINT128 ALBL, ALBH, QM2; \
702 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
703 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
704 (Ql).w[0] = ALBL.w[0]; \
705 __add_128_64(QM2, ALBH, ALBL.w[1]); \
706 (Ql).w[1] = QM2.w[0]; \
707}
708/*********************************************************************
709 *
710 * BID Pack/Unpack Macros
711 *
712 *********************************************************************/
713/////////////////////////////////////////
714// BID64 definitions
715////////////////////////////////////////
716#define DECIMAL_MAX_EXPON_64 767
717#define DECIMAL_EXPONENT_BIAS 398
718#define MAX_FORMAT_DIGITS 16
719/////////////////////////////////////////
720// BID128 definitions
721////////////////////////////////////////
722#define DECIMAL_MAX_EXPON_128 12287
723#define DECIMAL_EXPONENT_BIAS_128 6176
724#define MAX_FORMAT_DIGITS_128 34
725/////////////////////////////////////////
726// BID32 definitions
727////////////////////////////////////////
728#define DECIMAL_MAX_EXPON_32 191
729#define DECIMAL_EXPONENT_BIAS_32 101
730#define MAX_FORMAT_DIGITS_32 7
731////////////////////////////////////////
732// Constant Definitions
733///////////////////////////////////////
734#define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735#define INFINITY_MASK64 0x7800000000000000ull
b2a00c89
L
736#define SINFINITY_MASK64 0xf800000000000000ull
737#define SSNAN_MASK64 0xfc00000000000000ull
200359e8
L
738#define NAN_MASK64 0x7c00000000000000ull
739#define SNAN_MASK64 0x7e00000000000000ull
740#define QUIET_MASK64 0xfdffffffffffffffull
741#define LARGE_COEFF_MASK64 0x0007ffffffffffffull
742#define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
743#define SMALL_COEFF_MASK64 0x001fffffffffffffull
744#define EXPONENT_MASK64 0x3ff
745#define EXPONENT_SHIFT_LARGE64 51
746#define EXPONENT_SHIFT_SMALL64 53
747#define LARGEST_BID64 0x77fb86f26fc0ffffull
748#define SMALLEST_BID64 0xf7fb86f26fc0ffffull
749#define SMALL_COEFF_MASK128 0x0001ffffffffffffull
750#define LARGE_COEFF_MASK128 0x00007fffffffffffull
751#define EXPONENT_MASK128 0x3fff
752#define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
753#define LARGEST_BID128_LOW 0x378d8e63ffffffffull
754#define SPECIAL_ENCODING_MASK32 0x60000000ul
755#define INFINITY_MASK32 0x78000000ul
756#define LARGE_COEFF_MASK32 0x007ffffful
757#define LARGE_COEFF_HIGH_BIT32 0x00800000ul
758#define SMALL_COEFF_MASK32 0x001ffffful
759#define EXPONENT_MASK32 0xff
760#define LARGEST_BID32 0x77f8967f
b2a00c89
L
761#define NAN_MASK32 0x7c000000
762#define SNAN_MASK32 0x7e000000
200359e8
L
763#define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
764#define BINARY_EXPONENT_BIAS 0x3ff
765#define UPPER_EXPON_LIMIT 51
766// data needed for BID pack/unpack macros
b2a00c89
L
767extern UINT64 round_const_table[][19];
768extern UINT128 reciprocals10_128[];
769extern int recip_scale[];
770extern UINT128 power10_table_128[];
771extern int estimate_decimal_digits[];
772extern int estimate_bin_expon[];
773extern UINT64 power10_index_binexp[];
774extern int short_recip_scale[];
775extern UINT64 reciprocals10_64[];
776extern UINT128 power10_index_binexp_128[];
777extern UINT128 round_const_table_128[][36];
200359e8
L
778
779
780//////////////////////////////////////////////
781// Status Flag Handling
782/////////////////////////////////////////////
783#define __set_status_flags(fpsc, status) *(fpsc) |= status
784#define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
785
786__BID_INLINE__ UINT64
787unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788 UINT64 * pcoefficient_x, UINT64 x) {
789 UINT64 tmp, coeff;
790
791 *psign_x = x & 0x8000000000000000ull;
792
793 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794 // special encodings
795 // coefficient
796 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797
798 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
b2a00c89
L
799 *pexponent_x = 0;
800 *pcoefficient_x = x & 0xfe03ffffffffffffull;
801 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802 *pcoefficient_x = x & 0xfe00000000000000ull;
803 if ((x & NAN_MASK64) == INFINITY_MASK64)
804 *pcoefficient_x = x & SINFINITY_MASK64;
200359e8
L
805 return 0; // NaN or Infinity
806 }
807 // check for non-canonical values
808 if (coeff >= 10000000000000000ull)
809 coeff = 0;
810 *pcoefficient_x = coeff;
811 // get exponent
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
b2a00c89 814 return coeff;
200359e8
L
815 }
816 // exponent
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 // coefficient
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
822 return *pcoefficient_x;
823}
824
825//
826// BID64 pack macro (general form)
827//
828__BID_INLINE__ UINT64
829get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 unsigned *fpsc) {
831 UINT128 Stemp, Q_low;
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833 int extra_digits, amount, amount2;
834 unsigned status;
835
836 if (coeff > 9999999999999999ull) {
837 expon++;
838 coeff = 1000000000000000ull;
839 }
840 // check for possible underflow/overflow
841 if (((unsigned) expon) >= 3 * 256) {
842 if (expon < 0) {
843 // underflow
844 if (expon + MAX_FORMAT_DIGITS < 0) {
845#ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc,
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848#endif
849#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850#ifndef IEEE_ROUND_NEAREST
851 if (rmode == ROUNDING_DOWN && sgn)
852 return 0x8000000000000001ull;
853 if (rmode == ROUNDING_UP && !sgn)
854 return 1ull;
855#endif
856#endif
857 // result is 0
858 return sgn;
859 }
860#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861#ifndef IEEE_ROUND_NEAREST
862 if (sgn && (unsigned) (rmode - 1) < 2)
863 rmode = 3 - rmode;
864#endif
865#endif
866 // get digits to be shifted out
867 extra_digits = -expon;
b2a00c89 868 coeff += round_const_table[rmode][extra_digits];
200359e8
L
869
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH, Q_low, coeff,
b2a00c89 872 reciprocals10_128[extra_digits]);
200359e8
L
873
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
b2a00c89 875 amount = recip_scale[extra_digits];
200359e8
L
876
877 C64 = QH >> amount;
878
879#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880#ifndef IEEE_ROUND_NEAREST
b2a00c89 881 if (rmode == 0) //ROUNDING_TO_NEAREST
200359e8
L
882#endif
883 if (C64 & 1) {
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
886 // get remainder
887 amount2 = 64 - amount;
888 remainder_h = 0;
889 remainder_h--;
890 remainder_h >>= amount2;
891 remainder_h = remainder_h & QH;
892
893 if (!remainder_h
b2a00c89
L
894 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 896 && Q_low.w[0] <
b2a00c89 897 reciprocals10_128[extra_digits].w[0]))) {
200359e8
L
898 C64--;
899 }
900 }
901#endif
902
903#ifdef SET_STATUS_FLAGS
904
905 if (is_inexact (fpsc))
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907 else {
908 status = INEXACT_EXCEPTION;
909 // get remainder
910 remainder_h = QH << (64 - amount);
911
912 switch (rmode) {
913 case ROUNDING_TO_NEAREST:
914 case ROUNDING_TIES_AWAY:
915 // test whether fractional part is 0
916 if (remainder_h == 0x8000000000000000ull
b2a00c89
L
917 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 919 && Q_low.w[0] <
b2a00c89 920 reciprocals10_128[extra_digits].w[0])))
200359e8
L
921 status = EXACT_STATUS;
922 break;
923 case ROUNDING_DOWN:
924 case ROUNDING_TO_ZERO:
925 if (!remainder_h
b2a00c89
L
926 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 928 && Q_low.w[0] <
b2a00c89 929 reciprocals10_128[extra_digits].w[0])))
200359e8
L
930 status = EXACT_STATUS;
931 break;
932 default:
933 // round up
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
b2a00c89 935 reciprocals10_128[extra_digits].w[0]);
200359e8 936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
b2a00c89 937 reciprocals10_128[extra_digits].w[1], CY);
200359e8
L
938 if ((remainder_h >> (64 - amount)) + carry >=
939 (((UINT64) 1) << amount))
940 status = EXACT_STATUS;
941 }
942
943 if (status != EXACT_STATUS)
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945 }
946
947#endif
948
949 return sgn | C64;
950 }
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952 expon--;
953 coeff = (coeff << 3) + (coeff << 1);
954 }
955 if (expon > DECIMAL_MAX_EXPON_64) {
956#ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958#endif
959 // overflow
960 r = sgn | INFINITY_MASK64;
961 switch (rmode) {
962 case ROUNDING_DOWN:
963 if (!sgn)
964 r = LARGEST_BID64;
965 break;
966 case ROUNDING_TO_ZERO:
967 r = sgn | LARGEST_BID64;
968 break;
969 case ROUNDING_UP:
970 // round up
971 if (sgn)
972 r = SMALLEST_BID64;
973 }
974 return r;
975 }
976 }
977
978 mask = 1;
979 mask <<= EXPONENT_SHIFT_SMALL64;
980
981 // check whether coefficient fits in 10*5+3 bits
982 if (coeff < mask) {
983 r = expon;
984 r <<= EXPONENT_SHIFT_SMALL64;
985 r |= (coeff | sgn);
986 return r;
987 }
988 // special format
989
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff == 10000000000000000ull) {
992 r = expon + 1;
993 r <<= EXPONENT_SHIFT_SMALL64;
994 r |= (1000000000000000ull | sgn);
995 return r;
996 }
997
998 r = expon;
999 r <<= EXPONENT_SHIFT_LARGE64;
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
1001 // add coeff, without leading bits
1002 mask = (mask >> 2) - 1;
1003 coeff &= mask;
1004 r |= coeff;
1005
1006 return r;
1007}
1008
1009
1010
1011
1012//
1013// No overflow/underflow checking
1014//
1015__BID_INLINE__ UINT64
1016fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017 UINT64 r, mask;
1018
1019 mask = 1;
1020 mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022 // check whether coefficient fits in 10*5+3 bits
1023 if (coeff < mask) {
1024 r = expon;
1025 r <<= EXPONENT_SHIFT_SMALL64;
1026 r |= (coeff | sgn);
1027 return r;
1028 }
1029 // special format
1030
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff == 10000000000000000ull) {
1033 r = expon + 1;
1034 r <<= EXPONENT_SHIFT_SMALL64;
1035 r |= (1000000000000000ull | sgn);
1036 return r;
1037 }
1038
1039 r = expon;
1040 r <<= EXPONENT_SHIFT_LARGE64;
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
1042 // add coeff, without leading bits
1043 mask = (mask >> 2) - 1;
1044 coeff &= mask;
1045 r |= coeff;
1046
1047 return r;
1048}
1049
1050
1051//
1052// no underflow checking
1053//
1054__BID_INLINE__ UINT64
1055fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 unsigned *fpsc) {
1057 UINT64 r, mask;
1058
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061 expon = 3 * 256;
1062 coeff = 1000000000000000ull;
1063 }
1064
1065 if (((unsigned) expon) >= 3 * 256) {
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 expon--;
1068 coeff = (coeff << 3) + (coeff << 1);
1069 }
1070 if (expon > DECIMAL_MAX_EXPON_64) {
1071#ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc,
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074#endif
1075 // overflow
1076 r = sgn | INFINITY_MASK64;
1077 switch (rmode) {
1078 case ROUNDING_DOWN:
1079 if (!sgn)
1080 r = LARGEST_BID64;
1081 break;
1082 case ROUNDING_TO_ZERO:
1083 r = sgn | LARGEST_BID64;
1084 break;
1085 case ROUNDING_UP:
1086 // round up
1087 if (sgn)
1088 r = SMALLEST_BID64;
1089 }
1090 return r;
1091 }
1092 }
1093 }
1094
1095 mask = 1;
1096 mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098 // check whether coefficient fits in 10*5+3 bits
1099 if (coeff < mask) {
1100 r = expon;
1101 r <<= EXPONENT_SHIFT_SMALL64;
1102 r |= (coeff | sgn);
1103 return r;
1104 }
1105 // special format
1106
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff == 10000000000000000ull) {
1109 r = expon + 1;
1110 r <<= EXPONENT_SHIFT_SMALL64;
1111 r |= (1000000000000000ull | sgn);
1112 return r;
1113 }
1114
1115 r = expon;
1116 r <<= EXPONENT_SHIFT_LARGE64;
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
1118 // add coeff, without leading bits
1119 mask = (mask >> 2) - 1;
1120 coeff &= mask;
1121 r |= coeff;
1122
1123 return r;
1124}
1125
1126
1127//
1128// No overflow/underflow checking
1129// or checking for coefficients equal to 10^16 (after rounding)
1130//
1131__BID_INLINE__ UINT64
1132very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133 UINT64 r, mask;
1134
1135 mask = 1;
1136 mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138 // check whether coefficient fits in 10*5+3 bits
1139 if (coeff < mask) {
1140 r = expon;
1141 r <<= EXPONENT_SHIFT_SMALL64;
1142 r |= (coeff | sgn);
1143 return r;
1144 }
1145 // special format
1146 r = expon;
1147 r <<= EXPONENT_SHIFT_LARGE64;
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
1149 // add coeff, without leading bits
1150 mask = (mask >> 2) - 1;
1151 coeff &= mask;
1152 r |= coeff;
1153
1154 return r;
1155}
1156
1157//
1158// No overflow/underflow checking or checking for coefficients above 2^53
1159//
1160__BID_INLINE__ UINT64
1161very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162 // no UF/OF
1163 UINT64 r;
1164
1165 r = expon;
1166 r <<= EXPONENT_SHIFT_SMALL64;
1167 r |= (coeff | sgn);
1168 return r;
1169}
1170
1171
1172//
1173// This pack macro is used when underflow is known to occur
1174//
1175__BID_INLINE__ UINT64
1176get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 unsigned *fpsc) {
1178 UINT128 C128, Q_low, Stemp;
1179 UINT64 C64, remainder_h, QH, carry, CY;
1180 int extra_digits, amount, amount2;
1181 unsigned status;
1182
1183 // underflow
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
1185#ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187#endif
1188#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189#ifndef IEEE_ROUND_NEAREST
1190 if (rmode == ROUNDING_DOWN && sgn)
1191 return 0x8000000000000001ull;
1192 if (rmode == ROUNDING_UP && !sgn)
1193 return 1ull;
1194#endif
1195#endif
1196 // result is 0
1197 return sgn;
1198 }
1199 // 10*coeff
1200 coeff = (coeff << 3) + (coeff << 1);
1201#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202#ifndef IEEE_ROUND_NEAREST
1203 if (sgn && (unsigned) (rmode - 1) < 2)
1204 rmode = 3 - rmode;
1205#endif
1206#endif
1207 if (R)
1208 coeff |= 1;
1209 // get digits to be shifted out
1210 extra_digits = 1 - expon;
b2a00c89 1211 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
200359e8
L
1212
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
b2a00c89 1215 reciprocals10_128[extra_digits]);
200359e8
L
1216
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
b2a00c89 1218 amount = recip_scale[extra_digits];
200359e8
L
1219
1220 C64 = QH >> amount;
1221 //__shr_128(C128, Q_high, amount);
1222
1223#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224#ifndef IEEE_ROUND_NEAREST
b2a00c89 1225 if (rmode == 0) //ROUNDING_TO_NEAREST
200359e8
L
1226#endif
1227 if (C64 & 1) {
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230 // get remainder
1231 amount2 = 64 - amount;
1232 remainder_h = 0;
1233 remainder_h--;
1234 remainder_h >>= amount2;
1235 remainder_h = remainder_h & QH;
1236
1237 if (!remainder_h
b2a00c89
L
1238 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1240 && Q_low.w[0] <
b2a00c89 1241 reciprocals10_128[extra_digits].w[0]))) {
200359e8
L
1242 C64--;
1243 }
1244 }
1245#endif
1246
1247#ifdef SET_STATUS_FLAGS
1248
1249 if (is_inexact (fpsc))
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251 else {
1252 status = INEXACT_EXCEPTION;
1253 // get remainder
1254 remainder_h = QH << (64 - amount);
1255
1256 switch (rmode) {
1257 case ROUNDING_TO_NEAREST:
1258 case ROUNDING_TIES_AWAY:
1259 // test whether fractional part is 0
1260 if (remainder_h == 0x8000000000000000ull
b2a00c89
L
1261 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1263 && Q_low.w[0] <
b2a00c89 1264 reciprocals10_128[extra_digits].w[0])))
200359e8
L
1265 status = EXACT_STATUS;
1266 break;
1267 case ROUNDING_DOWN:
1268 case ROUNDING_TO_ZERO:
1269 if (!remainder_h
b2a00c89
L
1270 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1272 && Q_low.w[0] <
b2a00c89 1273 reciprocals10_128[extra_digits].w[0])))
200359e8
L
1274 status = EXACT_STATUS;
1275 break;
1276 default:
1277 // round up
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
b2a00c89 1279 reciprocals10_128[extra_digits].w[0]);
200359e8 1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
b2a00c89 1281 reciprocals10_128[extra_digits].w[1], CY);
200359e8
L
1282 if ((remainder_h >> (64 - amount)) + carry >=
1283 (((UINT64) 1) << amount))
1284 status = EXACT_STATUS;
1285 }
1286
1287 if (status != EXACT_STATUS)
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289 }
1290
1291#endif
1292
1293 return sgn | C64;
1294
1295}
1296
1297
1298
1299//
1300// This pack macro doesnot check for coefficients above 2^53
1301//
1302__BID_INLINE__ UINT64
1303get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 int rmode, unsigned *fpsc) {
1305 UINT128 C128, Q_low, Stemp;
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307 int extra_digits, amount, amount2;
1308 unsigned status;
1309
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon) >= 3 * 256) {
1312 if (expon < 0) {
1313 // underflow
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
1315#ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc,
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318#endif
1319#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320#ifndef IEEE_ROUND_NEAREST
1321 if (rmode == ROUNDING_DOWN && sgn)
1322 return 0x8000000000000001ull;
1323 if (rmode == ROUNDING_UP && !sgn)
1324 return 1ull;
1325#endif
1326#endif
1327 // result is 0
1328 return sgn;
1329 }
1330#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331#ifndef IEEE_ROUND_NEAREST
1332 if (sgn && (unsigned) (rmode - 1) < 2)
1333 rmode = 3 - rmode;
1334#endif
1335#endif
1336 // get digits to be shifted out
1337 extra_digits = -expon;
b2a00c89 1338 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
200359e8
L
1339
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
b2a00c89 1342 reciprocals10_128[extra_digits]);
200359e8
L
1343
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
b2a00c89 1345 amount = recip_scale[extra_digits];
200359e8
L
1346
1347 C64 = QH >> amount;
1348
1349#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350#ifndef IEEE_ROUND_NEAREST
b2a00c89 1351 if (rmode == 0) //ROUNDING_TO_NEAREST
200359e8
L
1352#endif
1353 if (C64 & 1) {
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356 // get remainder
1357 amount2 = 64 - amount;
1358 remainder_h = 0;
1359 remainder_h--;
1360 remainder_h >>= amount2;
1361 remainder_h = remainder_h & QH;
1362
1363 if (!remainder_h
b2a00c89
L
1364 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1366 && Q_low.w[0] <
b2a00c89 1367 reciprocals10_128[extra_digits].w[0]))) {
200359e8
L
1368 C64--;
1369 }
1370 }
1371#endif
1372
1373#ifdef SET_STATUS_FLAGS
1374
1375 if (is_inexact (fpsc))
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377 else {
1378 status = INEXACT_EXCEPTION;
1379 // get remainder
1380 remainder_h = QH << (64 - amount);
1381
1382 switch (rmode) {
1383 case ROUNDING_TO_NEAREST:
1384 case ROUNDING_TIES_AWAY:
1385 // test whether fractional part is 0
1386 if (remainder_h == 0x8000000000000000ull
b2a00c89
L
1387 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1389 && Q_low.w[0] <
b2a00c89 1390 reciprocals10_128[extra_digits].w[0])))
200359e8
L
1391 status = EXACT_STATUS;
1392 break;
1393 case ROUNDING_DOWN:
1394 case ROUNDING_TO_ZERO:
1395 if (!remainder_h
b2a00c89
L
1396 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
200359e8 1398 && Q_low.w[0] <
b2a00c89 1399 reciprocals10_128[extra_digits].w[0])))
200359e8
L
1400 status = EXACT_STATUS;
1401 break;
1402 default:
1403 // round up
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
b2a00c89 1405 reciprocals10_128[extra_digits].w[0]);
200359e8 1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
b2a00c89 1407 reciprocals10_128[extra_digits].w[1], CY);
200359e8
L
1408 if ((remainder_h >> (64 - amount)) + carry >=
1409 (((UINT64) 1) << amount))
1410 status = EXACT_STATUS;
1411 }
1412
1413 if (status != EXACT_STATUS)
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415 }
1416
1417#endif
1418
1419 return sgn | C64;
1420 }
1421
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423 expon--;
1424 coeff = (coeff << 3) + (coeff << 1);
1425 }
1426 if (expon > DECIMAL_MAX_EXPON_64) {
1427#ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429#endif
1430 // overflow
1431 r = sgn | INFINITY_MASK64;
1432 switch (rmode) {
1433 case ROUNDING_DOWN:
1434 if (!sgn)
1435 r = LARGEST_BID64;
1436 break;
1437 case ROUNDING_TO_ZERO:
1438 r = sgn | LARGEST_BID64;
1439 break;
1440 case ROUNDING_UP:
1441 // round up
1442 if (sgn)
1443 r = SMALLEST_BID64;
1444 }
1445 return r;
1446 } else {
1447 mask = 1;
1448 mask <<= EXPONENT_SHIFT_SMALL64;
1449 if (coeff >= mask) {
1450 r = expon;
1451 r <<= EXPONENT_SHIFT_LARGE64;
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 // add coeff, without leading bits
1454 mask = (mask >> 2) - 1;
1455 coeff &= mask;
1456 r |= coeff;
1457 return r;
1458 }
1459 }
1460 }
1461
1462 r = expon;
1463 r <<= EXPONENT_SHIFT_SMALL64;
1464 r |= (coeff | sgn);
1465
1466 return r;
1467}
1468
1469
1470/*****************************************************************************
1471*
1472* BID128 pack/unpack macros
1473*
1474*****************************************************************************/
1475
1476//
1477// Macro for handling BID128 underflow
1478// sticky bit given as additional argument
1479//
1480__BID_INLINE__ UINT128 *
1481handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484 UINT64 carry, CY;
1485 int ed2, amount;
1486 unsigned rmode, status;
1487
1488 // UF occurs
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490#ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492#endif
1493 pres->w[1] = sgn;
1494 pres->w[0] = 0;
1495#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496#ifndef IEEE_ROUND_NEAREST
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
1499 pres->w[0] = 1ull;
1500#endif
1501#endif
1502 return pres;
1503 }
1504 // CQ *= 10
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506 CQ2.w[0] = CQ.w[0] << 1;
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508 CQ8.w[0] = CQ.w[0] << 3;
1509 __add_128_128 (CQ, CQ2, CQ8);
1510
1511 // add remainder
1512 if (R)
1513 CQ.w[0] |= 1;
1514
1515 ed2 = 1 - expon;
1516 // add rounding constant to CQ
1517#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518#ifndef IEEE_ROUND_NEAREST
1519 rmode = *prounding_mode;
1520 if (sgn && (unsigned) (rmode - 1) < 2)
1521 rmode = 3 - rmode;
1522#else
1523 rmode = 0;
1524#endif
1525#else
1526 rmode = 0;
1527#endif
b2a00c89 1528 T128 = round_const_table_128[rmode][ed2];
200359e8
L
1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
b2a00c89 1532 TP128 = reciprocals10_128[ed2];
200359e8 1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
b2a00c89 1534 amount = recip_scale[ed2];
200359e8
L
1535
1536 if (amount >= 64) {
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
1538 CQ.w[1] = 0;
1539 } else {
1540 __shr_128 (CQ, Qh, amount);
1541 }
1542
200359e8
L
1543#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1544#ifndef IEEE_ROUND_NEAREST
1545 if (!(*prounding_mode))
1546#endif
1547 if (CQ.w[0] & 1) {
1548 // check whether fractional part of initial_P/10^ed1 is exactly .5
1549
1550 // get remainder
1551 __shl_128_long (Qh1, Qh, (128 - amount));
1552
1553 if (!Qh1.w[1] && !Qh1.w[0]
b2a00c89
L
1554 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1555 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1556 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
200359e8
L
1557 CQ.w[0]--;
1558 }
1559 }
1560#endif
1561
1562#ifdef SET_STATUS_FLAGS
1563
1564 if (is_inexact (fpsc))
1565 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1566 else {
1567 status = INEXACT_EXCEPTION;
1568 // get remainder
1569 __shl_128_long (Qh1, Qh, (128 - amount));
1570
1571 switch (rmode) {
1572 case ROUNDING_TO_NEAREST:
1573 case ROUNDING_TIES_AWAY:
1574 // test whether fractional part is 0
1575 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
b2a00c89
L
1576 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1577 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1578 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
200359e8
L
1579 status = EXACT_STATUS;
1580 break;
1581 case ROUNDING_DOWN:
1582 case ROUNDING_TO_ZERO:
1583 if ((!Qh1.w[1]) && (!Qh1.w[0])
b2a00c89
L
1584 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1585 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1586 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
200359e8
L
1587 status = EXACT_STATUS;
1588 break;
1589 default:
1590 // round up
1591 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
b2a00c89 1592 reciprocals10_128[ed2].w[0]);
200359e8 1593 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
b2a00c89 1594 reciprocals10_128[ed2].w[1], CY);
200359e8
L
1595 __shr_128_long (Qh, Qh1, (128 - amount));
1596 Tmp.w[0] = 1;
1597 Tmp.w[1] = 0;
1598 __shl_128_long (Tmp1, Tmp, amount);
1599 Qh.w[0] += carry;
1600 if (Qh.w[0] < carry)
1601 Qh.w[1]++;
1602 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1603 status = EXACT_STATUS;
1604 }
1605
1606 if (status != EXACT_STATUS)
1607 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1608 }
1609
1610#endif
1611
1612 pres->w[1] = sgn | CQ.w[1];
1613 pres->w[0] = CQ.w[0];
1614
1615 return pres;
1616
1617}
1618
1619
1620//
1621// Macro for handling BID128 underflow
1622//
1623__BID_INLINE__ UINT128 *
1624handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1625 unsigned *prounding_mode, unsigned *fpsc) {
1626 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1627 UINT64 carry, CY;
1628 int ed2, amount;
1629 unsigned rmode, status;
1630
1631 // UF occurs
1632 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1633#ifdef SET_STATUS_FLAGS
1634 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1635#endif
1636 pres->w[1] = sgn;
1637 pres->w[0] = 0;
1638#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1639#ifndef IEEE_ROUND_NEAREST
1640 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1641 || (!sgn && *prounding_mode == ROUNDING_UP))
1642 pres->w[0] = 1ull;
1643#endif
1644#endif
1645 return pres;
1646 }
1647
1648 ed2 = 0 - expon;
1649 // add rounding constant to CQ
1650#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1651#ifndef IEEE_ROUND_NEAREST
1652 rmode = *prounding_mode;
1653 if (sgn && (unsigned) (rmode - 1) < 2)
1654 rmode = 3 - rmode;
1655#else
1656 rmode = 0;
1657#endif
1658#else
1659 rmode = 0;
1660#endif
1661
b2a00c89 1662 T128 = round_const_table_128[rmode][ed2];
200359e8
L
1663 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1664 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1665
b2a00c89 1666 TP128 = reciprocals10_128[ed2];
200359e8 1667 __mul_128x128_full (Qh, Ql, CQ, TP128);
b2a00c89 1668 amount = recip_scale[ed2];
200359e8
L
1669
1670 if (amount >= 64) {
1671 CQ.w[0] = Qh.w[1] >> (amount - 64);
1672 CQ.w[1] = 0;
1673 } else {
1674 __shr_128 (CQ, Qh, amount);
1675 }
1676
200359e8
L
1677#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1678#ifndef IEEE_ROUND_NEAREST
1679 if (!(*prounding_mode))
1680#endif
1681 if (CQ.w[0] & 1) {
1682 // check whether fractional part of initial_P/10^ed1 is exactly .5
1683
1684 // get remainder
1685 __shl_128_long (Qh1, Qh, (128 - amount));
1686
1687 if (!Qh1.w[1] && !Qh1.w[0]
b2a00c89
L
1688 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1689 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1690 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
200359e8
L
1691 CQ.w[0]--;
1692 }
1693 }
1694#endif
1695
1696#ifdef SET_STATUS_FLAGS
1697
1698 if (is_inexact (fpsc))
1699 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1700 else {
1701 status = INEXACT_EXCEPTION;
1702 // get remainder
1703 __shl_128_long (Qh1, Qh, (128 - amount));
1704
1705 switch (rmode) {
1706 case ROUNDING_TO_NEAREST:
1707 case ROUNDING_TIES_AWAY:
1708 // test whether fractional part is 0
1709 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
b2a00c89
L
1710 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1711 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1712 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
200359e8
L
1713 status = EXACT_STATUS;
1714 break;
1715 case ROUNDING_DOWN:
1716 case ROUNDING_TO_ZERO:
1717 if ((!Qh1.w[1]) && (!Qh1.w[0])
b2a00c89
L
1718 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1719 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1720 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
200359e8
L
1721 status = EXACT_STATUS;
1722 break;
1723 default:
1724 // round up
1725 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
b2a00c89 1726 reciprocals10_128[ed2].w[0]);
200359e8 1727 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
b2a00c89 1728 reciprocals10_128[ed2].w[1], CY);
200359e8
L
1729 __shr_128_long (Qh, Qh1, (128 - amount));
1730 Tmp.w[0] = 1;
1731 Tmp.w[1] = 0;
1732 __shl_128_long (Tmp1, Tmp, amount);
1733 Qh.w[0] += carry;
1734 if (Qh.w[0] < carry)
1735 Qh.w[1]++;
1736 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1737 status = EXACT_STATUS;
1738 }
1739
1740 if (status != EXACT_STATUS)
1741 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1742 }
1743
1744#endif
1745
1746 pres->w[1] = sgn | CQ.w[1];
1747 pres->w[0] = CQ.w[0];
1748
1749 return pres;
1750
1751}
1752
1753
1754
1755//
1756// BID128 unpack, input passed by value
1757//
1758__BID_INLINE__ UINT64
1759unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
b2a00c89 1760 UINT128 * pcoefficient_x, UINT128 x) {
200359e8
L
1761 UINT128 coeff, T33, T34;
1762 UINT64 ex;
1763
1764 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1765
1766 // special encodings
1767 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1768 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1769 // non-canonical input
1770 pcoefficient_x->w[0] = 0;
1771 pcoefficient_x->w[1] = 0;
1772 ex = (x.w[1]) >> 47;
1773 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1774 return 0;
1775 }
1776 // 10^33
b2a00c89
L
1777 T33 = power10_table_128[33];
1778 /*coeff.w[0] = x.w[0];
1779 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1780 pcoefficient_x->w[0] = x.w[0];
1781 pcoefficient_x->w[1] = x.w[1];
1782 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1783 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1784
200359e8 1785 pcoefficient_x->w[0] = x.w[0];
b2a00c89
L
1786 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1787 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
1788 {
1789 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1790 pcoefficient_x->w[0] = 0;
1791 } else
1792 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1793 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1794 pcoefficient_x->w[0] = 0;
1795 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1796 }
1797 *pexponent_x = 0;
1798 return 0; // NaN or Infinity
200359e8
L
1799 }
1800
1801 coeff.w[0] = x.w[0];
1802 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1803
1804 // 10^34
b2a00c89 1805 T34 = power10_table_128[34];
200359e8
L
1806 // check for non-canonical values
1807 if (__unsigned_compare_ge_128 (coeff, T34))
1808 coeff.w[0] = coeff.w[1] = 0;
1809
1810 pcoefficient_x->w[0] = coeff.w[0];
1811 pcoefficient_x->w[1] = coeff.w[1];
1812
1813 ex = (x.w[1]) >> 49;
1814 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1815
1816 return coeff.w[0] | coeff.w[1];
1817}
1818
1819
1820//
1821// BID128 unpack, input pased by reference
1822//
1823__BID_INLINE__ UINT64
1824unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1825 UINT128 * pcoefficient_x, UINT128 * px) {
1826 UINT128 coeff, T33, T34;
1827 UINT64 ex;
1828
1829 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1830
1831 // special encodings
1832 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1833 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1834 // non-canonical input
1835 pcoefficient_x->w[0] = 0;
1836 pcoefficient_x->w[1] = 0;
1837 ex = (px->w[1]) >> 47;
1838 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1839 return 0;
1840 }
1841 // 10^33
b2a00c89 1842 T33 = power10_table_128[33];
200359e8
L
1843 coeff.w[0] = px->w[0];
1844 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1845 pcoefficient_x->w[0] = px->w[0];
1846 pcoefficient_x->w[1] = px->w[1];
b2a00c89 1847 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
200359e8 1848 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
b2a00c89
L
1849 pcoefficient_x->w[0] = 0;
1850 }
1851 *pexponent_x = 0;
1852 return 0; // NaN or Infinity
200359e8
L
1853 }
1854
1855 coeff.w[0] = px->w[0];
1856 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1857
1858 // 10^34
b2a00c89 1859 T34 = power10_table_128[34];
200359e8
L
1860 // check for non-canonical values
1861 if (__unsigned_compare_ge_128 (coeff, T34))
1862 coeff.w[0] = coeff.w[1] = 0;
1863
1864 pcoefficient_x->w[0] = coeff.w[0];
1865 pcoefficient_x->w[1] = coeff.w[1];
1866
1867 ex = (px->w[1]) >> 49;
1868 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1869
1870 return coeff.w[0] | coeff.w[1];
1871}
1872
1873//
1874// Pack macro checks for overflow, but not underflow
1875//
1876__BID_INLINE__ UINT128 *
1877get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1878 UINT128 coeff, unsigned *prounding_mode,
1879 unsigned *fpsc) {
1880 UINT128 T;
1881 UINT64 tmp, tmp2;
1882
1883 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1884
1885 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
b2a00c89 1886 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
200359e8
L
1887 while (__unsigned_compare_gt_128 (T, coeff)
1888 && expon > DECIMAL_MAX_EXPON_128) {
1889 coeff.w[1] =
1890 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1891 (coeff.w[0] >> 63);
1892 tmp2 = coeff.w[0] << 3;
1893 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1894 if (coeff.w[0] < tmp2)
1895 coeff.w[1]++;
1896
1897 expon--;
1898 }
1899 }
1900 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1901 // OF
1902#ifdef SET_STATUS_FLAGS
1903 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1904#endif
1905#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1906#ifndef IEEE_ROUND_NEAREST
1907 if (*prounding_mode == ROUNDING_TO_ZERO
1908 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1909 &&
1910 *prounding_mode
1911 ==
1912 ROUNDING_DOWN))
1913 {
1914 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1915 pres->w[0] = LARGEST_BID128_LOW;
1916 } else
1917#endif
1918#endif
1919 {
1920 pres->w[1] = sgn | INFINITY_MASK64;
1921 pres->w[0] = 0;
1922 }
1923 return pres;
1924 }
1925 }
1926
1927 pres->w[0] = coeff.w[0];
1928 tmp = expon;
1929 tmp <<= 49;
1930 pres->w[1] = sgn | tmp | coeff.w[1];
1931
1932 return pres;
1933}
1934
1935
1936//
1937// No overflow/underflow checks
1938// No checking for coefficient == 10^34 (rounding artifact)
1939//
1940__BID_INLINE__ UINT128 *
1941get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1942 UINT128 coeff) {
1943 UINT64 tmp;
1944
1945 pres->w[0] = coeff.w[0];
1946 tmp = expon;
1947 tmp <<= 49;
1948 pres->w[1] = sgn | tmp | coeff.w[1];
1949
1950 return pres;
1951}
1952
1953//
1954// No overflow/underflow checks
1955//
1956__BID_INLINE__ UINT128 *
1957get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1958 UINT64 tmp;
1959
1960 // coeff==10^34?
1961 if (coeff.w[1] == 0x0001ed09bead87c0ull
1962 && coeff.w[0] == 0x378d8e6400000000ull) {
1963 expon++;
1964 // set coefficient to 10^33
1965 coeff.w[1] = 0x0000314dc6448d93ull;
1966 coeff.w[0] = 0x38c15b0a00000000ull;
1967 }
1968
1969 pres->w[0] = coeff.w[0];
1970 tmp = expon;
1971 tmp <<= 49;
1972 pres->w[1] = sgn | tmp | coeff.w[1];
1973
1974 return pres;
1975}
1976
1977//
1978// General BID128 pack macro
1979//
1980__BID_INLINE__ UINT128 *
1981get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1982 unsigned *prounding_mode, unsigned *fpsc) {
1983 UINT128 T;
1984 UINT64 tmp, tmp2;
1985
1986 // coeff==10^34?
1987 if (coeff.w[1] == 0x0001ed09bead87c0ull
1988 && coeff.w[0] == 0x378d8e6400000000ull) {
1989 expon++;
1990 // set coefficient to 10^33
1991 coeff.w[1] = 0x0000314dc6448d93ull;
1992 coeff.w[0] = 0x38c15b0a00000000ull;
1993 }
1994 // check OF, UF
b2a00c89 1995 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
200359e8 1996 // check UF
b2a00c89 1997 if (expon < 0) {
200359e8
L
1998 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1999 fpsc);
b2a00c89 2000 }
200359e8
L
2001
2002 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
b2a00c89 2003 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
200359e8
L
2004 while (__unsigned_compare_gt_128 (T, coeff)
2005 && expon > DECIMAL_MAX_EXPON_128) {
2006 coeff.w[1] =
2007 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2008 (coeff.w[0] >> 63);
2009 tmp2 = coeff.w[0] << 3;
2010 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2011 if (coeff.w[0] < tmp2)
2012 coeff.w[1]++;
2013
2014 expon--;
2015 }
2016 }
b2a00c89
L
2017 if (expon > DECIMAL_MAX_EXPON_128) {
2018 if (!(coeff.w[1] | coeff.w[0])) {
2019 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2020 pres->w[0] = 0;
2021 return pres;
2022 }
200359e8
L
2023 // OF
2024#ifdef SET_STATUS_FLAGS
2025 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2026#endif
2027#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2028#ifndef IEEE_ROUND_NEAREST
2029 if (*prounding_mode == ROUNDING_TO_ZERO
2030 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2031 &&
2032 *prounding_mode
2033 ==
2034 ROUNDING_DOWN))
2035 {
2036 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2037 pres->w[0] = LARGEST_BID128_LOW;
2038 } else
2039#endif
2040#endif
2041 {
2042 pres->w[1] = sgn | INFINITY_MASK64;
2043 pres->w[0] = 0;
2044 }
2045 return pres;
2046 }
2047 }
2048
2049 pres->w[0] = coeff.w[0];
2050 tmp = expon;
2051 tmp <<= 49;
2052 pres->w[1] = sgn | tmp | coeff.w[1];
2053
2054 return pres;
2055}
2056
2057
2058//
2059// Macro used for conversions from string
2060// (no additional arguments given for rounding mode, status flags)
2061//
2062__BID_INLINE__ UINT128 *
2063get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2064 UINT128 D2, D8;
2065 UINT64 tmp;
2066 unsigned rmode = 0, status;
2067
2068 // coeff==10^34?
2069 if (coeff.w[1] == 0x0001ed09bead87c0ull
2070 && coeff.w[0] == 0x378d8e6400000000ull) {
2071 expon++;
2072 // set coefficient to 10^33
2073 coeff.w[1] = 0x0000314dc6448d93ull;
2074 coeff.w[0] = 0x38c15b0a00000000ull;
2075 }
2076 // check OF, UF
2077 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2078 // check UF
2079 if (expon < 0)
2080 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2081
2082 // OF
2083
2084 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2085 while (expon > DECIMAL_MAX_EXPON_128 &&
b2a00c89
L
2086 (coeff.w[1] < power10_table_128[33].w[1] ||
2087 (coeff.w[1] == power10_table_128[33].w[1]
2088 && coeff.w[0] < power10_table_128[33].w[0]))) {
200359e8
L
2089 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2090 D2.w[0] = coeff.w[0] << 1;
2091 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2092 D8.w[0] = coeff.w[0] << 3;
2093
2094 __add_128_128 (coeff, D2, D8);
2095 expon--;
2096 }
2097 } else if (!(coeff.w[0] | coeff.w[1]))
2098 expon = DECIMAL_MAX_EXPON_128;
2099
2100 if (expon > DECIMAL_MAX_EXPON_128) {
2101 pres->w[1] = sgn | INFINITY_MASK64;
2102 pres->w[0] = 0;
2103 switch (rmode) {
2104 case ROUNDING_DOWN:
2105 if (!sgn) {
2106 pres->w[1] = LARGEST_BID128_HIGH;
2107 pres->w[0] = LARGEST_BID128_LOW;
2108 }
2109 break;
2110 case ROUNDING_TO_ZERO:
2111 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2112 pres->w[0] = LARGEST_BID128_LOW;
2113 break;
2114 case ROUNDING_UP:
2115 // round up
2116 if (sgn) {
2117 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2118 pres->w[0] = LARGEST_BID128_LOW;
2119 }
2120 break;
2121 }
2122
2123 return pres;
2124 }
2125 }
2126
2127 pres->w[0] = coeff.w[0];
2128 tmp = expon;
2129 tmp <<= 49;
2130 pres->w[1] = sgn | tmp | coeff.w[1];
2131
2132 return pres;
2133}
2134
2135
2136
2137/*****************************************************************************
2138*
2139* BID32 pack/unpack macros
2140*
2141*****************************************************************************/
2142
2143
2144__BID_INLINE__ UINT32
2145unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2146 UINT32 * pcoefficient_x, UINT32 x) {
2147 UINT32 tmp;
2148
2149 *psign_x = x & 0x80000000;
2150
2151 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2152 // special encodings
b2a00c89
L
2153 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2154 *pcoefficient_x = x & 0xfe0fffff;
2155 if ((x & 0x000fffff) >= 1000000)
2156 *pcoefficient_x = x & 0xfe000000;
2157 if ((x & NAN_MASK32) == INFINITY_MASK32)
2158 *pcoefficient_x = x & 0xf8000000;
2159 *pexponent_x = 0;
200359e8 2160 return 0; // NaN or Infinity
b2a00c89 2161 }
200359e8
L
2162 // coefficient
2163 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2164 // check for non-canonical value
2165 if (*pcoefficient_x >= 10000000)
2166 *pcoefficient_x = 0;
2167 // get exponent
2168 tmp = x >> 21;
2169 *pexponent_x = tmp & EXPONENT_MASK32;
2170 return 1;
2171 }
2172 // exponent
2173 tmp = x >> 23;
2174 *pexponent_x = tmp & EXPONENT_MASK32;
2175 // coefficient
2176 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2177
2178 return *pcoefficient_x;
2179}
2180
2181//
2182// General pack macro for BID32
2183//
2184__BID_INLINE__ UINT32
2185get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2186 unsigned *fpsc) {
2187 UINT128 Q;
2188 UINT64 C64, remainder_h, carry, Stemp;
2189 UINT32 r, mask;
2190 int extra_digits, amount, amount2;
2191 unsigned status;
2192
2193 if (coeff > 9999999ull) {
2194 expon++;
2195 coeff = 1000000ull;
2196 }
2197 // check for possible underflow/overflow
2198 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2199 if (expon < 0) {
2200 // underflow
2201 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2202#ifdef SET_STATUS_FLAGS
2203 __set_status_flags (fpsc,
2204 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2205#endif
2206#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2207#ifndef IEEE_ROUND_NEAREST
2208 if (rmode == ROUNDING_DOWN && sgn)
2209 return 0x80000001;
2210 if (rmode == ROUNDING_UP && !sgn)
2211 return 1;
2212#endif
2213#endif
2214 // result is 0
2215 return sgn;
2216 }
2217 // get digits to be shifted out
2218#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2219 rmode = 0;
2220#endif
2221#ifdef IEEE_ROUND_NEAREST
2222 rmode = 0;
b2a00c89
L
2223#endif
2224#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2225#ifndef IEEE_ROUND_NEAREST
2226 if (sgn && (unsigned) (rmode - 1) < 2)
2227 rmode = 3 - rmode;
2228#endif
200359e8
L
2229#endif
2230
2231 extra_digits = -expon;
b2a00c89 2232 coeff += round_const_table[rmode][extra_digits];
200359e8
L
2233
2234 // get coeff*(2^M[extra_digits])/10^extra_digits
b2a00c89 2235 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
200359e8
L
2236
2237 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
b2a00c89 2238 amount = short_recip_scale[extra_digits];
200359e8
L
2239
2240 C64 = Q.w[1] >> amount;
2241
2242#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2243#ifndef IEEE_ROUND_NEAREST
b2a00c89 2244 if (rmode == 0) //ROUNDING_TO_NEAREST
200359e8
L
2245#endif
2246 if (C64 & 1) {
2247 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2248
2249 // get remainder
2250 amount2 = 64 - amount;
2251 remainder_h = 0;
2252 remainder_h--;
2253 remainder_h >>= amount2;
2254 remainder_h = remainder_h & Q.w[1];
2255
b2a00c89 2256 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
200359e8
L
2257 C64--;
2258 }
2259 }
2260#endif
2261
2262#ifdef SET_STATUS_FLAGS
2263
2264 if (is_inexact (fpsc))
2265 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2266 else {
2267 status = INEXACT_EXCEPTION;
2268 // get remainder
2269 remainder_h = Q.w[1] << (64 - amount);
2270
2271 switch (rmode) {
2272 case ROUNDING_TO_NEAREST:
2273 case ROUNDING_TIES_AWAY:
2274 // test whether fractional part is 0
2275 if (remainder_h == 0x8000000000000000ull
b2a00c89 2276 && (Q.w[0] < reciprocals10_64[extra_digits]))
200359e8
L
2277 status = EXACT_STATUS;
2278 break;
2279 case ROUNDING_DOWN:
2280 case ROUNDING_TO_ZERO:
b2a00c89 2281 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
200359e8
L
2282 status = EXACT_STATUS;
2283 break;
2284 default:
2285 // round up
2286 __add_carry_out (Stemp, carry, Q.w[0],
b2a00c89 2287 reciprocals10_64[extra_digits]);
200359e8
L
2288 if ((remainder_h >> (64 - amount)) + carry >=
2289 (((UINT64) 1) << amount))
2290 status = EXACT_STATUS;
2291 }
2292
2293 if (status != EXACT_STATUS)
2294 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2295 }
2296
2297#endif
2298
2299 return sgn | (UINT32) C64;
2300 }
2301
2302 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2303 coeff = (coeff << 3) + (coeff << 1);
2304 expon--;
2305 }
2306 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2307 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2308 // overflow
2309 r = sgn | INFINITY_MASK32;
2310 switch (rmode) {
2311 case ROUNDING_DOWN:
2312 if (!sgn)
2313 r = LARGEST_BID32;
2314 break;
2315 case ROUNDING_TO_ZERO:
2316 r = sgn | LARGEST_BID32;
2317 break;
2318 case ROUNDING_UP:
2319 // round up
2320 if (sgn)
2321 r = sgn | LARGEST_BID32;
2322 }
2323 return r;
2324 }
2325 }
2326
2327 mask = 1 << 23;
2328
2329 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2330 if (coeff < mask) {
2331 r = expon;
2332 r <<= 23;
2333 r |= ((UINT32) coeff | sgn);
2334 return r;
2335 }
2336 // special format
2337
2338 r = expon;
2339 r <<= 21;
2340 r |= (sgn | SPECIAL_ENCODING_MASK32);
2341 // add coeff, without leading bits
2342 mask = (1 << 21) - 1;
2343 r |= (((UINT32) coeff) & mask);
2344
2345 return r;
2346}
2347
2348
2349
2350//
2351// no overflow/underflow checks
2352//
2353__BID_INLINE__ UINT32
2354very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2355 UINT32 r, mask;
2356
2357 mask = 1 << 23;
2358
2359 // check whether coefficient fits in 10*2+3 bits
2360 if (coeff < mask) {
2361 r = expon;
2362 r <<= 23;
2363 r |= (coeff | sgn);
2364 return r;
2365 }
2366 // special format
2367 r = expon;
2368 r <<= 21;
2369 r |= (sgn | SPECIAL_ENCODING_MASK32);
2370 // add coeff, without leading bits
2371 mask = (1 << 21) - 1;
2372 coeff &= mask;
2373 r |= coeff;
2374
2375 return r;
2376}
2377
2378
2379
2380/*************************************************************
2381 *
2382 *************************************************************/
2383typedef
2384ALIGN (16)
2385 struct {
2386 UINT64 w[6];
2387 } UINT384;
2388 typedef ALIGN (16)
2389 struct {
2390 UINT64 w[8];
2391 } UINT512;
2392
2393// #define P 34
2394#define MASK_STEERING_BITS 0x6000000000000000ull
2395#define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2396#define MASK_BINARY_SIG1 0x001fffffffffffffull
2397#define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2398 //used to take G[2:w+3] (sec 3.3)
2399#define MASK_BINARY_SIG2 0x0007ffffffffffffull
2400 //used to mask out G4:T0 (sec 3.3)
2401#define MASK_BINARY_OR2 0x0020000000000000ull
2402 //used to prefix 8+G4 to T (sec 3.3)
2403#define UPPER_EXPON_LIMIT 51
2404#define MASK_EXP 0x7ffe000000000000ull
2405#define MASK_SPECIAL 0x7800000000000000ull
2406#define MASK_NAN 0x7c00000000000000ull
2407#define MASK_SNAN 0x7e00000000000000ull
2408#define MASK_ANY_INF 0x7c00000000000000ull
2409#define MASK_INF 0x7800000000000000ull
2410#define MASK_SIGN 0x8000000000000000ull
2411#define MASK_COEFF 0x0001ffffffffffffull
2412#define BIN_EXP_BIAS (0x1820ull << 49)
2413
2414#define EXP_MIN 0x0000000000000000ull
2415 // EXP_MIN = (-6176 + 6176) << 49
2416#define EXP_MAX 0x5ffe000000000000ull
2417 // EXP_MAX = (6111 + 6176) << 49
2418#define EXP_MAX_P1 0x6000000000000000ull
2419 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2420#define EXP_P1 0x0002000000000000ull
2421 // EXP_ P1= 1 << 49
2422#define expmin -6176
2423 // min unbiased exponent
2424#define expmax 6111
2425 // max unbiased exponent
2426#define expmin16 -398
2427 // min unbiased exponent
2428#define expmax16 369
2429 // max unbiased exponent
2430
2431#define SIGNMASK32 0x80000000
2432#define BID64_SIG_MAX 0x002386F26FC0ffffull
2433#define SIGNMASK64 0x8000000000000000ull
2434
2435// typedef unsigned int FPSC; // floating-point status and control
2436 // bit31:
2437 // bit30:
2438 // bit29:
2439 // bit28:
2440 // bit27:
2441 // bit26:
2442 // bit25:
2443 // bit24:
2444 // bit23:
2445 // bit22:
2446 // bit21:
2447 // bit20:
2448 // bit19:
2449 // bit18:
2450 // bit17:
2451 // bit16:
2452 // bit15:
2453 // bit14: RC:2
2454 // bit13: RC:1
2455 // bit12: RC:0
2456 // bit11: PM
2457 // bit10: UM
2458 // bit9: OM
2459 // bit8: ZM
2460 // bit7: DM
2461 // bit6: IM
2462 // bit5: PE
2463 // bit4: UE
2464 // bit3: OE
2465 // bit2: ZE
2466 // bit1: DE
2467 // bit0: IE
2468
2469#define ROUNDING_MODE_MASK 0x00007000
2470
2471 typedef struct _DEC_DIGITS {
2472 unsigned int digits;
2473 UINT64 threshold_hi;
2474 UINT64 threshold_lo;
2475 unsigned int digits1;
2476 } DEC_DIGITS;
2477
b2a00c89
L
2478 extern DEC_DIGITS nr_digits[];
2479 extern UINT64 midpoint64[];
2480 extern UINT128 midpoint128[];
2481 extern UINT192 midpoint192[];
2482 extern UINT256 midpoint256[];
2483 extern UINT64 ten2k64[];
2484 extern UINT128 ten2k128[];
2485 extern UINT256 ten2k256[];
2486 extern UINT128 ten2mk128[];
2487 extern UINT64 ten2mk64[];
2488 extern UINT128 ten2mk128trunc[];
2489 extern int shiftright128[];
2490 extern UINT64 maskhigh128[];
2491 extern UINT64 maskhigh128M[];
2492 extern UINT64 maskhigh192M[];
2493 extern UINT64 maskhigh256M[];
2494 extern UINT64 onehalf128[];
2495 extern UINT64 onehalf128M[];
2496 extern UINT64 onehalf192M[];
2497 extern UINT64 onehalf256M[];
2498 extern UINT128 ten2mk128M[];
2499 extern UINT128 ten2mk128truncM[];
2500 extern UINT192 ten2mk192truncM[];
2501 extern UINT256 ten2mk256truncM[];
2502 extern int shiftright128M[];
2503 extern int shiftright192M[];
2504 extern int shiftright256M[];
2505 extern UINT192 ten2mk192M[];
2506 extern UINT256 ten2mk256M[];
2507 extern unsigned char char_table2[];
2508 extern unsigned char char_table3[];
2509
2510 extern UINT64 ten2m3k64[];
2511 extern unsigned int shift_ten2m3k64[];
2512 extern UINT128 ten2m3k128[];
2513 extern unsigned int shift_ten2m3k128[];
200359e8
L
2514
2515
2516
2517/***************************************************************************
2518 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2519 ***************************************************************************/
2520
b2a00c89
L
2521 extern UINT64 Kx64[];
2522 extern unsigned int Ex64m64[];
2523 extern UINT64 half64[];
2524 extern UINT64 mask64[];
2525 extern UINT64 ten2mxtrunc64[];
2526
2527 extern UINT128 Kx128[];
2528 extern unsigned int Ex128m128[];
2529 extern UINT64 half128[];
2530 extern UINT64 mask128[];
2531 extern UINT128 ten2mxtrunc128[];
2532
2533 extern UINT192 Kx192[];
2534 extern unsigned int Ex192m192[];
2535 extern UINT64 half192[];
2536 extern UINT64 mask192[];
2537 extern UINT192 ten2mxtrunc192[];
2538
2539 extern UINT256 Kx256[];
2540 extern unsigned int Ex256m256[];
2541 extern UINT64 half256[];
2542 extern UINT64 mask256[];
2543 extern UINT256 ten2mxtrunc256[];
200359e8
L
2544
2545 typedef union __bid64_128 {
2546 UINT64 b64;
2547 UINT128 b128;
2548 } BID64_128;
2549
2550 BID64_128 bid_fma (unsigned int P0,
2551 BID64_128 x1, unsigned int P1,
2552 BID64_128 y1, unsigned int P2,
2553 BID64_128 z1, unsigned int P3,
2554 unsigned int rnd_mode, FPSC * fpsc);
2555
2556#define P16 16
2557#define P34 34
2558
2559 union __int_double {
2560 UINT64 i;
2561 double d;
2562 };
2563 typedef union __int_double int_double;
2564
2565
2566 union __int_float {
2567 UINT32 i;
2568 float d;
2569 };
2570 typedef union __int_float int_float;
2571
2572#define SWAP(A,B,T) {\
2573 T = A; \
2574 A = B; \
2575 B = T; \
2576}
2577
2578// this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2579// ie it knows that it is A bits long
2580#define NUMBITS(A, coefficient_x, tempx){\
2581 temp_x.d=(float)coefficient_x;\
2582 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2583}
2584
2585 enum class_types {
2586 signalingNaN,
2587 quietNaN,
2588 negativeInfinity,
2589 negativeNormal,
2590 negativeSubnormal,
2591 negativeZero,
2592 positiveZero,
2593 positiveSubnormal,
2594 positiveNormal,
2595 positiveInfinity
2596 };
2597
b2a00c89
L
2598 typedef union {
2599 UINT64 ui64;
2600 double d;
2601 } BID_UI64DOUBLE;
200359e8 2602
b2a00c89 2603#endif