]> git.ipfire.org Git - thirdparty/qemu.git/blame - fpu/softfloat.c
Merge remote-tracking branch 'remotes/rth/tags/pull-fpu-20181005' into staging
[thirdparty/qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167
AB
338/* Canonicalize EXP and FRAC, setting CLS. */
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167 582 } else {
4f251cfd 583 if (pickNaN(a.cls, b.cls,
6fff2167
AB
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
0bcfbcbe
RH
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
6fff2167
AB
591 }
592 return a;
593}
594
d446830a
AB
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
1839189b
PM
598 int which;
599
d446830a
AB
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
3bd2dec1 604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 605
d446830a 606 if (s->default_nan_mode) {
1839189b
PM
607 /* Note that this check is after pickNaNMulAdd so that function
608 * has an opportunity to set the Invalid flag.
609 */
f7e598e2 610 which = 3;
1839189b 611 }
d446830a 612
1839189b
PM
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
f7e598e2 623 return parts_default_nan(s);
1839189b
PM
624 default:
625 g_assert_not_reached();
d446830a 626 }
1839189b 627
0bcfbcbe
RH
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
d446830a
AB
631 return a;
632}
633
6fff2167
AB
634/*
635 * Returns the result of adding or subtracting the values of the
636 * floating-point values `a' and `b'. The operation is performed
637 * according to the IEC/IEEE Standard for Binary Floating-Point
638 * Arithmetic.
639 */
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648 /* Subtraction */
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
f7e598e2 678 return parts_default_nan(s);
6fff2167
AB
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694 /* Addition */
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 704 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723/*
724 * Returns the result of adding or subtracting the floating-point
725 * values `a' and `b'. The operation is performed according to the
726 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
727 */
728
729float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
730 float_status *status)
731{
732 FloatParts pa = float16_unpack_canonical(a, status);
733 FloatParts pb = float16_unpack_canonical(b, status);
734 FloatParts pr = addsub_floats(pa, pb, false, status);
735
736 return float16_round_pack_canonical(pr, status);
737}
738
739float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
740 float_status *status)
741{
742 FloatParts pa = float32_unpack_canonical(a, status);
743 FloatParts pb = float32_unpack_canonical(b, status);
744 FloatParts pr = addsub_floats(pa, pb, false, status);
745
746 return float32_round_pack_canonical(pr, status);
747}
748
749float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
750 float_status *status)
751{
752 FloatParts pa = float64_unpack_canonical(a, status);
753 FloatParts pb = float64_unpack_canonical(b, status);
754 FloatParts pr = addsub_floats(pa, pb, false, status);
755
756 return float64_round_pack_canonical(pr, status);
757}
758
759float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
760 float_status *status)
761{
762 FloatParts pa = float16_unpack_canonical(a, status);
763 FloatParts pb = float16_unpack_canonical(b, status);
764 FloatParts pr = addsub_floats(pa, pb, true, status);
765
766 return float16_round_pack_canonical(pr, status);
767}
768
769float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
770 float_status *status)
771{
772 FloatParts pa = float32_unpack_canonical(a, status);
773 FloatParts pb = float32_unpack_canonical(b, status);
774 FloatParts pr = addsub_floats(pa, pb, true, status);
775
776 return float32_round_pack_canonical(pr, status);
777}
778
779float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
780 float_status *status)
781{
782 FloatParts pa = float64_unpack_canonical(a, status);
783 FloatParts pb = float64_unpack_canonical(b, status);
784 FloatParts pr = addsub_floats(pa, pb, true, status);
785
786 return float64_round_pack_canonical(pr, status);
787}
788
74d707e2
AB
789/*
790 * Returns the result of multiplying the floating-point values `a' and
791 * `b'. The operation is performed according to the IEC/IEEE Standard
792 * for Binary Floating-Point Arithmetic.
793 */
794
795static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
796{
797 bool sign = a.sign ^ b.sign;
798
799 if (a.cls == float_class_normal && b.cls == float_class_normal) {
800 uint64_t hi, lo;
801 int exp = a.exp + b.exp;
802
803 mul64To128(a.frac, b.frac, &hi, &lo);
804 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
805 if (lo & DECOMPOSED_OVERFLOW_BIT) {
806 shift64RightJamming(lo, 1, &lo);
807 exp += 1;
808 }
809
810 /* Re-use a */
811 a.exp = exp;
812 a.sign = sign;
813 a.frac = lo;
814 return a;
815 }
816 /* handle all the NaN cases */
817 if (is_nan(a.cls) || is_nan(b.cls)) {
818 return pick_nan(a, b, s);
819 }
820 /* Inf * Zero == NaN */
821 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
822 (a.cls == float_class_zero && b.cls == float_class_inf)) {
823 s->float_exception_flags |= float_flag_invalid;
f7e598e2 824 return parts_default_nan(s);
74d707e2
AB
825 }
826 /* Multiply by 0 or Inf */
827 if (a.cls == float_class_inf || a.cls == float_class_zero) {
828 a.sign = sign;
829 return a;
830 }
831 if (b.cls == float_class_inf || b.cls == float_class_zero) {
832 b.sign = sign;
833 return b;
834 }
835 g_assert_not_reached();
836}
837
838float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
839 float_status *status)
840{
841 FloatParts pa = float16_unpack_canonical(a, status);
842 FloatParts pb = float16_unpack_canonical(b, status);
843 FloatParts pr = mul_floats(pa, pb, status);
844
845 return float16_round_pack_canonical(pr, status);
846}
847
848float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
849 float_status *status)
850{
851 FloatParts pa = float32_unpack_canonical(a, status);
852 FloatParts pb = float32_unpack_canonical(b, status);
853 FloatParts pr = mul_floats(pa, pb, status);
854
855 return float32_round_pack_canonical(pr, status);
856}
857
858float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
859 float_status *status)
860{
861 FloatParts pa = float64_unpack_canonical(a, status);
862 FloatParts pb = float64_unpack_canonical(b, status);
863 FloatParts pr = mul_floats(pa, pb, status);
864
865 return float64_round_pack_canonical(pr, status);
866}
867
d446830a
AB
868/*
869 * Returns the result of multiplying the floating-point values `a' and
870 * `b' then adding 'c', with no intermediate rounding step after the
871 * multiplication. The operation is performed according to the
872 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
873 * The flags argument allows the caller to select negation of the
874 * addend, the intermediate product, or the final result. (The
875 * difference between this and having the caller do a separate
876 * negation is that negating externally will flip the sign bit on
877 * NaNs.)
878 */
879
880static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
881 int flags, float_status *s)
882{
883 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
884 ((1 << float_class_inf) | (1 << float_class_zero));
885 bool p_sign;
886 bool sign_flip = flags & float_muladd_negate_result;
887 FloatClass p_class;
888 uint64_t hi, lo;
889 int p_exp;
890
891 /* It is implementation-defined whether the cases of (0,inf,qnan)
892 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
893 * they return if they do), so we have to hand this information
894 * off to the target-specific pick-a-NaN routine.
895 */
896 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
897 return pick_nan_muladd(a, b, c, inf_zero, s);
898 }
899
900 if (inf_zero) {
901 s->float_exception_flags |= float_flag_invalid;
f7e598e2 902 return parts_default_nan(s);
d446830a
AB
903 }
904
905 if (flags & float_muladd_negate_c) {
906 c.sign ^= 1;
907 }
908
909 p_sign = a.sign ^ b.sign;
910
911 if (flags & float_muladd_negate_product) {
912 p_sign ^= 1;
913 }
914
915 if (a.cls == float_class_inf || b.cls == float_class_inf) {
916 p_class = float_class_inf;
917 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
918 p_class = float_class_zero;
919 } else {
920 p_class = float_class_normal;
921 }
922
923 if (c.cls == float_class_inf) {
924 if (p_class == float_class_inf && p_sign != c.sign) {
925 s->float_exception_flags |= float_flag_invalid;
f7e598e2 926 return parts_default_nan(s);
d446830a
AB
927 } else {
928 a.cls = float_class_inf;
929 a.sign = c.sign ^ sign_flip;
f7e598e2 930 return a;
d446830a 931 }
d446830a
AB
932 }
933
934 if (p_class == float_class_inf) {
935 a.cls = float_class_inf;
936 a.sign = p_sign ^ sign_flip;
937 return a;
938 }
939
940 if (p_class == float_class_zero) {
941 if (c.cls == float_class_zero) {
942 if (p_sign != c.sign) {
943 p_sign = s->float_rounding_mode == float_round_down;
944 }
945 c.sign = p_sign;
946 } else if (flags & float_muladd_halve_result) {
947 c.exp -= 1;
948 }
949 c.sign ^= sign_flip;
950 return c;
951 }
952
953 /* a & b should be normals now... */
954 assert(a.cls == float_class_normal &&
955 b.cls == float_class_normal);
956
957 p_exp = a.exp + b.exp;
958
959 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
960 * result.
961 */
962 mul64To128(a.frac, b.frac, &hi, &lo);
963 /* binary point now at bit 124 */
964
965 /* check for overflow */
966 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
967 shift128RightJamming(hi, lo, 1, &hi, &lo);
968 p_exp += 1;
969 }
970
971 /* + add/sub */
972 if (c.cls == float_class_zero) {
973 /* move binary point back to 62 */
974 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
975 } else {
976 int exp_diff = p_exp - c.exp;
977 if (p_sign == c.sign) {
978 /* Addition */
979 if (exp_diff <= 0) {
980 shift128RightJamming(hi, lo,
981 DECOMPOSED_BINARY_POINT - exp_diff,
982 &hi, &lo);
983 lo += c.frac;
984 p_exp = c.exp;
985 } else {
986 uint64_t c_hi, c_lo;
987 /* shift c to the same binary point as the product (124) */
988 c_hi = c.frac >> 2;
989 c_lo = 0;
990 shift128RightJamming(c_hi, c_lo,
991 exp_diff,
992 &c_hi, &c_lo);
993 add128(hi, lo, c_hi, c_lo, &hi, &lo);
994 /* move binary point back to 62 */
995 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
996 }
997
998 if (lo & DECOMPOSED_OVERFLOW_BIT) {
999 shift64RightJamming(lo, 1, &lo);
1000 p_exp += 1;
1001 }
1002
1003 } else {
1004 /* Subtraction */
1005 uint64_t c_hi, c_lo;
1006 /* make C binary point match product at bit 124 */
1007 c_hi = c.frac >> 2;
1008 c_lo = 0;
1009
1010 if (exp_diff <= 0) {
1011 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1012 if (exp_diff == 0
1013 &&
1014 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1015 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1016 } else {
1017 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1018 p_sign ^= 1;
1019 p_exp = c.exp;
1020 }
1021 } else {
1022 shift128RightJamming(c_hi, c_lo,
1023 exp_diff,
1024 &c_hi, &c_lo);
1025 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1026 }
1027
1028 if (hi == 0 && lo == 0) {
1029 a.cls = float_class_zero;
1030 a.sign = s->float_rounding_mode == float_round_down;
1031 a.sign ^= sign_flip;
1032 return a;
1033 } else {
1034 int shift;
1035 if (hi != 0) {
1036 shift = clz64(hi);
1037 } else {
1038 shift = clz64(lo) + 64;
1039 }
1040 /* Normalizing to a binary point of 124 is the
1041 correct adjust for the exponent. However since we're
1042 shifting, we might as well put the binary point back
1043 at 62 where we really want it. Therefore shift as
1044 if we're leaving 1 bit at the top of the word, but
1045 adjust the exponent as if we're leaving 3 bits. */
1046 shift -= 1;
1047 if (shift >= 64) {
1048 lo = lo << (shift - 64);
1049 } else {
1050 hi = (hi << shift) | (lo >> (64 - shift));
1051 lo = hi | ((lo << shift) != 0);
1052 }
1053 p_exp -= shift - 2;
1054 }
1055 }
1056 }
1057
1058 if (flags & float_muladd_halve_result) {
1059 p_exp -= 1;
1060 }
1061
1062 /* finally prepare our result */
1063 a.cls = float_class_normal;
1064 a.sign = p_sign ^ sign_flip;
1065 a.exp = p_exp;
1066 a.frac = lo;
1067
1068 return a;
1069}
1070
1071float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1072 int flags, float_status *status)
1073{
1074 FloatParts pa = float16_unpack_canonical(a, status);
1075 FloatParts pb = float16_unpack_canonical(b, status);
1076 FloatParts pc = float16_unpack_canonical(c, status);
1077 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1078
1079 return float16_round_pack_canonical(pr, status);
1080}
1081
1082float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1083 int flags, float_status *status)
1084{
1085 FloatParts pa = float32_unpack_canonical(a, status);
1086 FloatParts pb = float32_unpack_canonical(b, status);
1087 FloatParts pc = float32_unpack_canonical(c, status);
1088 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1089
1090 return float32_round_pack_canonical(pr, status);
1091}
1092
1093float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1094 int flags, float_status *status)
1095{
1096 FloatParts pa = float64_unpack_canonical(a, status);
1097 FloatParts pb = float64_unpack_canonical(b, status);
1098 FloatParts pc = float64_unpack_canonical(c, status);
1099 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1100
1101 return float64_round_pack_canonical(pr, status);
1102}
1103
cf07323d
AB
1104/*
1105 * Returns the result of dividing the floating-point value `a' by the
1106 * corresponding value `b'. The operation is performed according to
1107 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1108 */
1109
1110static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1111{
1112 bool sign = a.sign ^ b.sign;
1113
1114 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1115 uint64_t n0, n1, q, r;
cf07323d 1116 int exp = a.exp - b.exp;
5dfbc9e4
RH
1117
1118 /*
1119 * We want a 2*N / N-bit division to produce exactly an N-bit
1120 * result, so that we do not lose any precision and so that we
1121 * do not have to renormalize afterward. If A.frac < B.frac,
1122 * then division would produce an (N-1)-bit result; shift A left
1123 * by one to produce the an N-bit result, and decrement the
1124 * exponent to match.
1125 *
1126 * The udiv_qrnnd algorithm that we're using requires normalization,
1127 * i.e. the msb of the denominator must be set. Since we know that
1128 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1129 * by one (more), and the remainder must be shifted right by one.
1130 */
cf07323d
AB
1131 if (a.frac < b.frac) {
1132 exp -= 1;
5dfbc9e4 1133 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1134 } else {
5dfbc9e4 1135 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1136 }
5dfbc9e4
RH
1137 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1138
1139 /*
1140 * Set lsb if there is a remainder, to set inexact.
1141 * As mentioned above, to find the actual value of the remainder we
1142 * would need to shift right, but (1) we are only concerned about
1143 * non-zero-ness, and (2) the remainder will always be even because
1144 * both inputs to the division primitive are even.
1145 */
1146 a.frac = q | (r != 0);
cf07323d
AB
1147 a.sign = sign;
1148 a.exp = exp;
1149 return a;
1150 }
1151 /* handle all the NaN cases */
1152 if (is_nan(a.cls) || is_nan(b.cls)) {
1153 return pick_nan(a, b, s);
1154 }
1155 /* 0/0 or Inf/Inf */
1156 if (a.cls == b.cls
1157 &&
1158 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1159 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1160 return parts_default_nan(s);
cf07323d 1161 }
9cb4e398
AB
1162 /* Inf / x or 0 / x */
1163 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1164 a.sign = sign;
1165 return a;
1166 }
cf07323d
AB
1167 /* Div 0 => Inf */
1168 if (b.cls == float_class_zero) {
1169 s->float_exception_flags |= float_flag_divbyzero;
1170 a.cls = float_class_inf;
1171 a.sign = sign;
1172 return a;
1173 }
cf07323d
AB
1174 /* Div by Inf */
1175 if (b.cls == float_class_inf) {
1176 a.cls = float_class_zero;
1177 a.sign = sign;
1178 return a;
1179 }
1180 g_assert_not_reached();
1181}
1182
1183float16 float16_div(float16 a, float16 b, float_status *status)
1184{
1185 FloatParts pa = float16_unpack_canonical(a, status);
1186 FloatParts pb = float16_unpack_canonical(b, status);
1187 FloatParts pr = div_floats(pa, pb, status);
1188
1189 return float16_round_pack_canonical(pr, status);
1190}
1191
1192float32 float32_div(float32 a, float32 b, float_status *status)
1193{
1194 FloatParts pa = float32_unpack_canonical(a, status);
1195 FloatParts pb = float32_unpack_canonical(b, status);
1196 FloatParts pr = div_floats(pa, pb, status);
1197
1198 return float32_round_pack_canonical(pr, status);
1199}
1200
1201float64 float64_div(float64 a, float64 b, float_status *status)
1202{
1203 FloatParts pa = float64_unpack_canonical(a, status);
1204 FloatParts pb = float64_unpack_canonical(b, status);
1205 FloatParts pr = div_floats(pa, pb, status);
1206
1207 return float64_round_pack_canonical(pr, status);
1208}
1209
6fed16b2
AB
1210/*
1211 * Float to Float conversions
1212 *
1213 * Returns the result of converting one float format to another. The
1214 * conversion is performed according to the IEC/IEEE Standard for
1215 * Binary Floating-Point Arithmetic.
1216 *
1217 * The float_to_float helper only needs to take care of raising
1218 * invalid exceptions and handling the conversion on NaNs.
1219 */
1220
1221static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1222 float_status *s)
1223{
1224 if (dstf->arm_althp) {
1225 switch (a.cls) {
1226 case float_class_qnan:
1227 case float_class_snan:
1228 /* There is no NaN in the destination format. Raise Invalid
1229 * and return a zero with the sign of the input NaN.
1230 */
1231 s->float_exception_flags |= float_flag_invalid;
1232 a.cls = float_class_zero;
1233 a.frac = 0;
1234 a.exp = 0;
1235 break;
1236
1237 case float_class_inf:
1238 /* There is no Inf in the destination format. Raise Invalid
1239 * and return the maximum normal with the correct sign.
1240 */
1241 s->float_exception_flags |= float_flag_invalid;
1242 a.cls = float_class_normal;
1243 a.exp = dstf->exp_max;
1244 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1245 break;
1246
1247 default:
1248 break;
1249 }
1250 } else if (is_nan(a.cls)) {
1251 if (is_snan(a.cls)) {
1252 s->float_exception_flags |= float_flag_invalid;
1253 a = parts_silence_nan(a, s);
1254 }
1255 if (s->default_nan_mode) {
1256 return parts_default_nan(s);
1257 }
1258 }
1259 return a;
1260}
1261
1262float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1263{
1264 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1265 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1266 FloatParts pr = float_to_float(p, &float32_params, s);
1267 return float32_round_pack_canonical(pr, s);
1268}
1269
1270float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1271{
1272 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1273 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1274 FloatParts pr = float_to_float(p, &float64_params, s);
1275 return float64_round_pack_canonical(pr, s);
1276}
1277
1278float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1279{
1280 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1281 FloatParts p = float32_unpack_canonical(a, s);
1282 FloatParts pr = float_to_float(p, fmt16, s);
1283 return float16a_round_pack_canonical(pr, s, fmt16);
1284}
1285
1286float64 float32_to_float64(float32 a, float_status *s)
1287{
1288 FloatParts p = float32_unpack_canonical(a, s);
1289 FloatParts pr = float_to_float(p, &float64_params, s);
1290 return float64_round_pack_canonical(pr, s);
1291}
1292
1293float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1294{
1295 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1296 FloatParts p = float64_unpack_canonical(a, s);
1297 FloatParts pr = float_to_float(p, fmt16, s);
1298 return float16a_round_pack_canonical(pr, s, fmt16);
1299}
1300
1301float32 float64_to_float32(float64 a, float_status *s)
1302{
1303 FloatParts p = float64_unpack_canonical(a, s);
1304 FloatParts pr = float_to_float(p, &float32_params, s);
1305 return float32_round_pack_canonical(pr, s);
1306}
1307
dbe4d53a
AB
1308/*
1309 * Rounds the floating-point value `a' to an integer, and returns the
1310 * result as a floating-point value. The operation is performed
1311 * according to the IEC/IEEE Standard for Binary Floating-Point
1312 * Arithmetic.
1313 */
1314
2f6c74be
RH
1315static FloatParts round_to_int(FloatParts a, int rmode,
1316 int scale, float_status *s)
dbe4d53a 1317{
2f6c74be
RH
1318 switch (a.cls) {
1319 case float_class_qnan:
1320 case float_class_snan:
dbe4d53a 1321 return return_nan(a, s);
dbe4d53a 1322
dbe4d53a
AB
1323 case float_class_zero:
1324 case float_class_inf:
dbe4d53a
AB
1325 /* already "integral" */
1326 break;
2f6c74be 1327
dbe4d53a 1328 case float_class_normal:
2f6c74be
RH
1329 scale = MIN(MAX(scale, -0x10000), 0x10000);
1330 a.exp += scale;
1331
dbe4d53a
AB
1332 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1333 /* already integral */
1334 break;
1335 }
1336 if (a.exp < 0) {
1337 bool one;
1338 /* all fractional */
1339 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1340 switch (rmode) {
dbe4d53a
AB
1341 case float_round_nearest_even:
1342 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1343 break;
1344 case float_round_ties_away:
1345 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1346 break;
1347 case float_round_to_zero:
1348 one = false;
1349 break;
1350 case float_round_up:
1351 one = !a.sign;
1352 break;
1353 case float_round_down:
1354 one = a.sign;
1355 break;
1356 default:
1357 g_assert_not_reached();
1358 }
1359
1360 if (one) {
1361 a.frac = DECOMPOSED_IMPLICIT_BIT;
1362 a.exp = 0;
1363 } else {
1364 a.cls = float_class_zero;
1365 }
1366 } else {
1367 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1368 uint64_t frac_lsbm1 = frac_lsb >> 1;
1369 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1370 uint64_t rnd_mask = rnd_even_mask >> 1;
1371 uint64_t inc;
1372
2f6c74be 1373 switch (rmode) {
dbe4d53a
AB
1374 case float_round_nearest_even:
1375 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1376 break;
1377 case float_round_ties_away:
1378 inc = frac_lsbm1;
1379 break;
1380 case float_round_to_zero:
1381 inc = 0;
1382 break;
1383 case float_round_up:
1384 inc = a.sign ? 0 : rnd_mask;
1385 break;
1386 case float_round_down:
1387 inc = a.sign ? rnd_mask : 0;
1388 break;
1389 default:
1390 g_assert_not_reached();
1391 }
1392
1393 if (a.frac & rnd_mask) {
1394 s->float_exception_flags |= float_flag_inexact;
1395 a.frac += inc;
1396 a.frac &= ~rnd_mask;
1397 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1398 a.frac >>= 1;
1399 a.exp++;
1400 }
1401 }
1402 }
1403 break;
1404 default:
1405 g_assert_not_reached();
1406 }
1407 return a;
1408}
1409
1410float16 float16_round_to_int(float16 a, float_status *s)
1411{
1412 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 1413 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1414 return float16_round_pack_canonical(pr, s);
1415}
1416
1417float32 float32_round_to_int(float32 a, float_status *s)
1418{
1419 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 1420 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1421 return float32_round_pack_canonical(pr, s);
1422}
1423
1424float64 float64_round_to_int(float64 a, float_status *s)
1425{
1426 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 1427 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1428 return float64_round_pack_canonical(pr, s);
1429}
1430
ab52f973
AB
1431/*
1432 * Returns the result of converting the floating-point value `a' to
1433 * the two's complement integer format. The conversion is performed
1434 * according to the IEC/IEEE Standard for Binary Floating-Point
1435 * Arithmetic---which means in particular that the conversion is
1436 * rounded according to the current rounding mode. If `a' is a NaN,
1437 * the largest positive integer is returned. Otherwise, if the
1438 * conversion overflows, the largest integer with the same sign as `a'
1439 * is returned.
1440*/
1441
2f6c74be 1442static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
1443 int64_t min, int64_t max,
1444 float_status *s)
1445{
1446 uint64_t r;
1447 int orig_flags = get_float_exception_flags(s);
2f6c74be 1448 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
1449
1450 switch (p.cls) {
1451 case float_class_snan:
1452 case float_class_qnan:
801bc563 1453 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1454 return max;
1455 case float_class_inf:
801bc563 1456 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1457 return p.sign ? min : max;
1458 case float_class_zero:
1459 return 0;
1460 case float_class_normal:
1461 if (p.exp < DECOMPOSED_BINARY_POINT) {
1462 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1463 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1464 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1465 } else {
1466 r = UINT64_MAX;
1467 }
1468 if (p.sign) {
33358375 1469 if (r <= -(uint64_t) min) {
ab52f973
AB
1470 return -r;
1471 } else {
1472 s->float_exception_flags = orig_flags | float_flag_invalid;
1473 return min;
1474 }
1475 } else {
33358375 1476 if (r <= max) {
ab52f973
AB
1477 return r;
1478 } else {
1479 s->float_exception_flags = orig_flags | float_flag_invalid;
1480 return max;
1481 }
1482 }
1483 default:
1484 g_assert_not_reached();
1485 }
1486}
1487
2f6c74be
RH
1488int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
1489 float_status *s)
1490{
1491 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1492 rmode, scale, INT16_MIN, INT16_MAX, s);
1493}
1494
1495int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
1496 float_status *s)
1497{
1498 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1499 rmode, scale, INT32_MIN, INT32_MAX, s);
1500}
1501
1502int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
1503 float_status *s)
1504{
1505 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1506 rmode, scale, INT64_MIN, INT64_MAX, s);
1507}
1508
1509int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
1510 float_status *s)
1511{
1512 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1513 rmode, scale, INT16_MIN, INT16_MAX, s);
1514}
1515
1516int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
1517 float_status *s)
1518{
1519 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1520 rmode, scale, INT32_MIN, INT32_MAX, s);
1521}
1522
1523int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
1524 float_status *s)
1525{
1526 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1527 rmode, scale, INT64_MIN, INT64_MAX, s);
1528}
1529
1530int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
1531 float_status *s)
1532{
1533 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1534 rmode, scale, INT16_MIN, INT16_MAX, s);
1535}
1536
1537int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
1538 float_status *s)
1539{
1540 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1541 rmode, scale, INT32_MIN, INT32_MAX, s);
1542}
1543
1544int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
1545 float_status *s)
1546{
1547 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1548 rmode, scale, INT64_MIN, INT64_MAX, s);
1549}
1550
1551int16_t float16_to_int16(float16 a, float_status *s)
1552{
1553 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1554}
1555
1556int32_t float16_to_int32(float16 a, float_status *s)
1557{
1558 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1559}
1560
1561int64_t float16_to_int64(float16 a, float_status *s)
1562{
1563 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1564}
1565
1566int16_t float32_to_int16(float32 a, float_status *s)
1567{
1568 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1569}
1570
1571int32_t float32_to_int32(float32 a, float_status *s)
1572{
1573 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1574}
1575
1576int64_t float32_to_int64(float32 a, float_status *s)
1577{
1578 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1579}
1580
1581int16_t float64_to_int16(float64 a, float_status *s)
1582{
1583 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1584}
1585
1586int32_t float64_to_int32(float64 a, float_status *s)
1587{
1588 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1589}
1590
1591int64_t float64_to_int64(float64 a, float_status *s)
1592{
1593 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1594}
1595
1596int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
1597{
1598 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
1599}
1600
1601int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
1602{
1603 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
1604}
1605
1606int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
1607{
1608 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
1609}
1610
2f6c74be
RH
1611int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
1612{
1613 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
1614}
ab52f973 1615
2f6c74be
RH
1616int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
1617{
1618 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
1619}
1620
1621int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
1622{
1623 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
1624}
1625
1626int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
1627{
1628 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
1629}
ab52f973 1630
2f6c74be
RH
1631int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
1632{
1633 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
1634}
ab52f973 1635
2f6c74be
RH
1636int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
1637{
1638 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
1639}
ab52f973
AB
1640
1641/*
1642 * Returns the result of converting the floating-point value `a' to
1643 * the unsigned integer format. The conversion is performed according
1644 * to the IEC/IEEE Standard for Binary Floating-Point
1645 * Arithmetic---which means in particular that the conversion is
1646 * rounded according to the current rounding mode. If `a' is a NaN,
1647 * the largest unsigned integer is returned. Otherwise, if the
1648 * conversion overflows, the largest unsigned integer is returned. If
1649 * the 'a' is negative, the result is rounded and zero is returned;
1650 * values that do not round to zero will raise the inexact exception
1651 * flag.
1652 */
1653
2f6c74be
RH
1654static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
1655 uint64_t max, float_status *s)
ab52f973
AB
1656{
1657 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
1658 FloatParts p = round_to_int(in, rmode, scale, s);
1659 uint64_t r;
ab52f973
AB
1660
1661 switch (p.cls) {
1662 case float_class_snan:
1663 case float_class_qnan:
1664 s->float_exception_flags = orig_flags | float_flag_invalid;
1665 return max;
1666 case float_class_inf:
801bc563 1667 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1668 return p.sign ? 0 : max;
1669 case float_class_zero:
1670 return 0;
1671 case float_class_normal:
ab52f973
AB
1672 if (p.sign) {
1673 s->float_exception_flags = orig_flags | float_flag_invalid;
1674 return 0;
1675 }
1676
1677 if (p.exp < DECOMPOSED_BINARY_POINT) {
1678 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1679 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1680 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1681 } else {
1682 s->float_exception_flags = orig_flags | float_flag_invalid;
1683 return max;
1684 }
1685
1686 /* For uint64 this will never trip, but if p.exp is too large
1687 * to shift a decomposed fraction we shall have exited via the
1688 * 3rd leg above.
1689 */
1690 if (r > max) {
1691 s->float_exception_flags = orig_flags | float_flag_invalid;
1692 return max;
ab52f973 1693 }
2f6c74be 1694 return r;
ab52f973
AB
1695 default:
1696 g_assert_not_reached();
1697 }
1698}
1699
2f6c74be
RH
1700uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
1701 float_status *s)
1702{
1703 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1704 rmode, scale, UINT16_MAX, s);
1705}
1706
1707uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
1708 float_status *s)
1709{
1710 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1711 rmode, scale, UINT32_MAX, s);
1712}
1713
1714uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
1715 float_status *s)
1716{
1717 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1718 rmode, scale, UINT64_MAX, s);
1719}
1720
1721uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
1722 float_status *s)
1723{
1724 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1725 rmode, scale, UINT16_MAX, s);
1726}
1727
1728uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
1729 float_status *s)
1730{
1731 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1732 rmode, scale, UINT32_MAX, s);
1733}
1734
1735uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
1736 float_status *s)
1737{
1738 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1739 rmode, scale, UINT64_MAX, s);
1740}
1741
1742uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
1743 float_status *s)
1744{
1745 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1746 rmode, scale, UINT16_MAX, s);
1747}
1748
1749uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
1750 float_status *s)
1751{
1752 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1753 rmode, scale, UINT32_MAX, s);
1754}
1755
1756uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
1757 float_status *s)
1758{
1759 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1760 rmode, scale, UINT64_MAX, s);
1761}
1762
1763uint16_t float16_to_uint16(float16 a, float_status *s)
1764{
1765 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1766}
1767
1768uint32_t float16_to_uint32(float16 a, float_status *s)
1769{
1770 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1771}
1772
1773uint64_t float16_to_uint64(float16 a, float_status *s)
1774{
1775 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1776}
1777
1778uint16_t float32_to_uint16(float32 a, float_status *s)
1779{
1780 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1781}
1782
1783uint32_t float32_to_uint32(float32 a, float_status *s)
1784{
1785 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1786}
1787
1788uint64_t float32_to_uint64(float32 a, float_status *s)
1789{
1790 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1791}
1792
1793uint16_t float64_to_uint16(float64 a, float_status *s)
1794{
1795 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1796}
1797
1798uint32_t float64_to_uint32(float64 a, float_status *s)
1799{
1800 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1801}
1802
1803uint64_t float64_to_uint64(float64 a, float_status *s)
1804{
1805 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1806}
1807
1808uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
1809{
1810 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1811}
1812
1813uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
1814{
1815 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1816}
1817
1818uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
1819{
1820 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1821}
1822
1823uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
1824{
1825 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1826}
1827
1828uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
1829{
1830 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1831}
1832
1833uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
1834{
1835 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1836}
1837
1838uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
1839{
1840 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1841}
1842
1843uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
1844{
1845 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1846}
1847
1848uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
1849{
1850 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1851}
ab52f973 1852
c02e1fb8
AB
1853/*
1854 * Integer to float conversions
1855 *
1856 * Returns the result of converting the two's complement integer `a'
1857 * to the floating-point format. The conversion is performed according
1858 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1859 */
1860
2abdfe24 1861static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 1862{
2abdfe24
RH
1863 FloatParts r = { .sign = false };
1864
c02e1fb8
AB
1865 if (a == 0) {
1866 r.cls = float_class_zero;
c02e1fb8 1867 } else {
2abdfe24
RH
1868 uint64_t f = a;
1869 int shift;
1870
1871 r.cls = float_class_normal;
c02e1fb8 1872 if (a < 0) {
2abdfe24 1873 f = -f;
c02e1fb8 1874 r.sign = true;
c02e1fb8 1875 }
2abdfe24
RH
1876 shift = clz64(f) - 1;
1877 scale = MIN(MAX(scale, -0x10000), 0x10000);
1878
1879 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1880 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
1881 }
1882
1883 return r;
1884}
1885
2abdfe24 1886float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1887{
2abdfe24 1888 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1889 return float16_round_pack_canonical(pa, status);
1890}
1891
2abdfe24
RH
1892float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
1893{
1894 return int64_to_float16_scalbn(a, scale, status);
1895}
1896
1897float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
1898{
1899 return int64_to_float16_scalbn(a, scale, status);
1900}
1901
1902float16 int64_to_float16(int64_t a, float_status *status)
1903{
1904 return int64_to_float16_scalbn(a, 0, status);
1905}
1906
c02e1fb8
AB
1907float16 int32_to_float16(int32_t a, float_status *status)
1908{
2abdfe24 1909 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1910}
1911
1912float16 int16_to_float16(int16_t a, float_status *status)
1913{
2abdfe24 1914 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1915}
1916
2abdfe24 1917float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1918{
2abdfe24 1919 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1920 return float32_round_pack_canonical(pa, status);
1921}
1922
2abdfe24
RH
1923float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
1924{
1925 return int64_to_float32_scalbn(a, scale, status);
1926}
1927
1928float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
1929{
1930 return int64_to_float32_scalbn(a, scale, status);
1931}
1932
1933float32 int64_to_float32(int64_t a, float_status *status)
1934{
1935 return int64_to_float32_scalbn(a, 0, status);
1936}
1937
c02e1fb8
AB
1938float32 int32_to_float32(int32_t a, float_status *status)
1939{
2abdfe24 1940 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1941}
1942
1943float32 int16_to_float32(int16_t a, float_status *status)
1944{
2abdfe24 1945 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1946}
1947
2abdfe24 1948float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1949{
2abdfe24 1950 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1951 return float64_round_pack_canonical(pa, status);
1952}
1953
2abdfe24
RH
1954float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
1955{
1956 return int64_to_float64_scalbn(a, scale, status);
1957}
1958
1959float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
1960{
1961 return int64_to_float64_scalbn(a, scale, status);
1962}
1963
1964float64 int64_to_float64(int64_t a, float_status *status)
1965{
1966 return int64_to_float64_scalbn(a, 0, status);
1967}
1968
c02e1fb8
AB
1969float64 int32_to_float64(int32_t a, float_status *status)
1970{
2abdfe24 1971 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1972}
1973
1974float64 int16_to_float64(int16_t a, float_status *status)
1975{
2abdfe24 1976 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1977}
1978
1979
1980/*
1981 * Unsigned Integer to float conversions
1982 *
1983 * Returns the result of converting the unsigned integer `a' to the
1984 * floating-point format. The conversion is performed according to the
1985 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1986 */
1987
2abdfe24 1988static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 1989{
2abdfe24 1990 FloatParts r = { .sign = false };
c02e1fb8
AB
1991
1992 if (a == 0) {
1993 r.cls = float_class_zero;
1994 } else {
2abdfe24 1995 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 1996 r.cls = float_class_normal;
2abdfe24
RH
1997 if ((int64_t)a < 0) {
1998 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
1999 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2000 r.frac = a;
2001 } else {
2abdfe24
RH
2002 int shift = clz64(a) - 1;
2003 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2004 r.frac = a << shift;
c02e1fb8
AB
2005 }
2006 }
2007
2008 return r;
2009}
2010
2abdfe24 2011float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2012{
2abdfe24 2013 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2014 return float16_round_pack_canonical(pa, status);
2015}
2016
2abdfe24
RH
2017float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2018{
2019 return uint64_to_float16_scalbn(a, scale, status);
2020}
2021
2022float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2023{
2024 return uint64_to_float16_scalbn(a, scale, status);
2025}
2026
2027float16 uint64_to_float16(uint64_t a, float_status *status)
2028{
2029 return uint64_to_float16_scalbn(a, 0, status);
2030}
2031
c02e1fb8
AB
2032float16 uint32_to_float16(uint32_t a, float_status *status)
2033{
2abdfe24 2034 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2035}
2036
2037float16 uint16_to_float16(uint16_t a, float_status *status)
2038{
2abdfe24 2039 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2040}
2041
2abdfe24 2042float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2043{
2abdfe24 2044 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2045 return float32_round_pack_canonical(pa, status);
2046}
2047
2abdfe24
RH
2048float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2049{
2050 return uint64_to_float32_scalbn(a, scale, status);
2051}
2052
2053float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2054{
2055 return uint64_to_float32_scalbn(a, scale, status);
2056}
2057
2058float32 uint64_to_float32(uint64_t a, float_status *status)
2059{
2060 return uint64_to_float32_scalbn(a, 0, status);
2061}
2062
c02e1fb8
AB
2063float32 uint32_to_float32(uint32_t a, float_status *status)
2064{
2abdfe24 2065 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2066}
2067
2068float32 uint16_to_float32(uint16_t a, float_status *status)
2069{
2abdfe24 2070 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2071}
2072
2abdfe24 2073float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2074{
2abdfe24 2075 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2076 return float64_round_pack_canonical(pa, status);
2077}
2078
2abdfe24
RH
2079float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2080{
2081 return uint64_to_float64_scalbn(a, scale, status);
2082}
2083
2084float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2085{
2086 return uint64_to_float64_scalbn(a, scale, status);
2087}
2088
2089float64 uint64_to_float64(uint64_t a, float_status *status)
2090{
2091 return uint64_to_float64_scalbn(a, 0, status);
2092}
2093
c02e1fb8
AB
2094float64 uint32_to_float64(uint32_t a, float_status *status)
2095{
2abdfe24 2096 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2097}
2098
2099float64 uint16_to_float64(uint16_t a, float_status *status)
2100{
2abdfe24 2101 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2102}
2103
89360067
AB
2104/* Float Min/Max */
2105/* min() and max() functions. These can't be implemented as
2106 * 'compare and pick one input' because that would mishandle
2107 * NaNs and +0 vs -0.
2108 *
2109 * minnum() and maxnum() functions. These are similar to the min()
2110 * and max() functions but if one of the arguments is a QNaN and
2111 * the other is numerical then the numerical argument is returned.
2112 * SNaNs will get quietened before being returned.
2113 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2114 * and maxNum() operations. min() and max() are the typical min/max
2115 * semantics provided by many CPUs which predate that specification.
2116 *
2117 * minnummag() and maxnummag() functions correspond to minNumMag()
2118 * and minNumMag() from the IEEE-754 2008.
2119 */
2120static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2121 bool ieee, bool ismag, float_status *s)
2122{
2123 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2124 if (ieee) {
2125 /* Takes two floating-point values `a' and `b', one of
2126 * which is a NaN, and returns the appropriate NaN
2127 * result. If either `a' or `b' is a signaling NaN,
2128 * the invalid exception is raised.
2129 */
2130 if (is_snan(a.cls) || is_snan(b.cls)) {
2131 return pick_nan(a, b, s);
2132 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2133 return b;
2134 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2135 return a;
2136 }
2137 }
2138 return pick_nan(a, b, s);
2139 } else {
2140 int a_exp, b_exp;
89360067
AB
2141
2142 switch (a.cls) {
2143 case float_class_normal:
2144 a_exp = a.exp;
2145 break;
2146 case float_class_inf:
2147 a_exp = INT_MAX;
2148 break;
2149 case float_class_zero:
2150 a_exp = INT_MIN;
2151 break;
2152 default:
2153 g_assert_not_reached();
2154 break;
2155 }
2156 switch (b.cls) {
2157 case float_class_normal:
2158 b_exp = b.exp;
2159 break;
2160 case float_class_inf:
2161 b_exp = INT_MAX;
2162 break;
2163 case float_class_zero:
2164 b_exp = INT_MIN;
2165 break;
2166 default:
2167 g_assert_not_reached();
2168 break;
2169 }
2170
6245327a
EC
2171 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2172 bool a_less = a_exp < b_exp;
2173 if (a_exp == b_exp) {
2174 a_less = a.frac < b.frac;
2175 }
2176 return a_less ^ ismin ? b : a;
89360067
AB
2177 }
2178
6245327a 2179 if (a.sign == b.sign) {
89360067
AB
2180 bool a_less = a_exp < b_exp;
2181 if (a_exp == b_exp) {
2182 a_less = a.frac < b.frac;
2183 }
6245327a 2184 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2185 } else {
6245327a 2186 return a.sign ^ ismin ? b : a;
89360067
AB
2187 }
2188 }
2189}
2190
2191#define MINMAX(sz, name, ismin, isiee, ismag) \
2192float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2193 float_status *s) \
2194{ \
2195 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2196 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2197 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2198 \
2199 return float ## sz ## _round_pack_canonical(pr, s); \
2200}
2201
2202MINMAX(16, min, true, false, false)
2203MINMAX(16, minnum, true, true, false)
2204MINMAX(16, minnummag, true, true, true)
2205MINMAX(16, max, false, false, false)
2206MINMAX(16, maxnum, false, true, false)
2207MINMAX(16, maxnummag, false, true, true)
2208
2209MINMAX(32, min, true, false, false)
2210MINMAX(32, minnum, true, true, false)
2211MINMAX(32, minnummag, true, true, true)
2212MINMAX(32, max, false, false, false)
2213MINMAX(32, maxnum, false, true, false)
2214MINMAX(32, maxnummag, false, true, true)
2215
2216MINMAX(64, min, true, false, false)
2217MINMAX(64, minnum, true, true, false)
2218MINMAX(64, minnummag, true, true, true)
2219MINMAX(64, max, false, false, false)
2220MINMAX(64, maxnum, false, true, false)
2221MINMAX(64, maxnummag, false, true, true)
2222
2223#undef MINMAX
2224
0c4c9092
AB
2225/* Floating point compare */
2226static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2227 float_status *s)
2228{
2229 if (is_nan(a.cls) || is_nan(b.cls)) {
2230 if (!is_quiet ||
2231 a.cls == float_class_snan ||
2232 b.cls == float_class_snan) {
2233 s->float_exception_flags |= float_flag_invalid;
2234 }
2235 return float_relation_unordered;
2236 }
2237
2238 if (a.cls == float_class_zero) {
2239 if (b.cls == float_class_zero) {
2240 return float_relation_equal;
2241 }
2242 return b.sign ? float_relation_greater : float_relation_less;
2243 } else if (b.cls == float_class_zero) {
2244 return a.sign ? float_relation_less : float_relation_greater;
2245 }
2246
2247 /* The only really important thing about infinity is its sign. If
2248 * both are infinities the sign marks the smallest of the two.
2249 */
2250 if (a.cls == float_class_inf) {
2251 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2252 return float_relation_equal;
2253 }
2254 return a.sign ? float_relation_less : float_relation_greater;
2255 } else if (b.cls == float_class_inf) {
2256 return b.sign ? float_relation_greater : float_relation_less;
2257 }
2258
2259 if (a.sign != b.sign) {
2260 return a.sign ? float_relation_less : float_relation_greater;
2261 }
2262
2263 if (a.exp == b.exp) {
2264 if (a.frac == b.frac) {
2265 return float_relation_equal;
2266 }
2267 if (a.sign) {
2268 return a.frac > b.frac ?
2269 float_relation_less : float_relation_greater;
2270 } else {
2271 return a.frac > b.frac ?
2272 float_relation_greater : float_relation_less;
2273 }
2274 } else {
2275 if (a.sign) {
2276 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2277 } else {
2278 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2279 }
2280 }
2281}
2282
2283#define COMPARE(sz) \
2284int float ## sz ## _compare(float ## sz a, float ## sz b, \
2285 float_status *s) \
2286{ \
2287 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2288 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2289 return compare_floats(pa, pb, false, s); \
2290} \
2291int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2292 float_status *s) \
2293{ \
2294 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2295 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2296 return compare_floats(pa, pb, true, s); \
2297}
2298
2299COMPARE(16)
2300COMPARE(32)
2301COMPARE(64)
2302
2303#undef COMPARE
2304
0bfc9f19
AB
2305/* Multiply A by 2 raised to the power N. */
2306static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2307{
2308 if (unlikely(is_nan(a.cls))) {
2309 return return_nan(a, s);
2310 }
2311 if (a.cls == float_class_normal) {
ce8d4082
RH
2312 /* The largest float type (even though not supported by FloatParts)
2313 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
2314 * still allows rounding to infinity, without allowing overflow
2315 * within the int32_t that backs FloatParts.exp.
2316 */
2317 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
2318 a.exp += n;
2319 }
2320 return a;
2321}
2322
2323float16 float16_scalbn(float16 a, int n, float_status *status)
2324{
2325 FloatParts pa = float16_unpack_canonical(a, status);
2326 FloatParts pr = scalbn_decomposed(pa, n, status);
2327 return float16_round_pack_canonical(pr, status);
2328}
2329
2330float32 float32_scalbn(float32 a, int n, float_status *status)
2331{
2332 FloatParts pa = float32_unpack_canonical(a, status);
2333 FloatParts pr = scalbn_decomposed(pa, n, status);
2334 return float32_round_pack_canonical(pr, status);
2335}
2336
2337float64 float64_scalbn(float64 a, int n, float_status *status)
2338{
2339 FloatParts pa = float64_unpack_canonical(a, status);
2340 FloatParts pr = scalbn_decomposed(pa, n, status);
2341 return float64_round_pack_canonical(pr, status);
2342}
2343
c13bb2da
AB
2344/*
2345 * Square Root
2346 *
2347 * The old softfloat code did an approximation step before zeroing in
2348 * on the final result. However for simpleness we just compute the
2349 * square root by iterating down from the implicit bit to enough extra
2350 * bits to ensure we get a correctly rounded result.
2351 *
2352 * This does mean however the calculation is slower than before,
2353 * especially for 64 bit floats.
2354 */
2355
2356static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2357{
2358 uint64_t a_frac, r_frac, s_frac;
2359 int bit, last_bit;
2360
2361 if (is_nan(a.cls)) {
2362 return return_nan(a, s);
2363 }
2364 if (a.cls == float_class_zero) {
2365 return a; /* sqrt(+-0) = +-0 */
2366 }
2367 if (a.sign) {
2368 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2369 return parts_default_nan(s);
c13bb2da
AB
2370 }
2371 if (a.cls == float_class_inf) {
2372 return a; /* sqrt(+inf) = +inf */
2373 }
2374
2375 assert(a.cls == float_class_normal);
2376
2377 /* We need two overflow bits at the top. Adding room for that is a
2378 * right shift. If the exponent is odd, we can discard the low bit
2379 * by multiplying the fraction by 2; that's a left shift. Combine
2380 * those and we shift right if the exponent is even.
2381 */
2382 a_frac = a.frac;
2383 if (!(a.exp & 1)) {
2384 a_frac >>= 1;
2385 }
2386 a.exp >>= 1;
2387
2388 /* Bit-by-bit computation of sqrt. */
2389 r_frac = 0;
2390 s_frac = 0;
2391
2392 /* Iterate from implicit bit down to the 3 extra bits to compute a
2393 * properly rounded result. Remember we've inserted one more bit
2394 * at the top, so these positions are one less.
2395 */
2396 bit = DECOMPOSED_BINARY_POINT - 1;
2397 last_bit = MAX(p->frac_shift - 4, 0);
2398 do {
2399 uint64_t q = 1ULL << bit;
2400 uint64_t t_frac = s_frac + q;
2401 if (t_frac <= a_frac) {
2402 s_frac = t_frac + q;
2403 a_frac -= t_frac;
2404 r_frac += q;
2405 }
2406 a_frac <<= 1;
2407 } while (--bit >= last_bit);
2408
2409 /* Undo the right shift done above. If there is any remaining
2410 * fraction, the result is inexact. Set the sticky bit.
2411 */
2412 a.frac = (r_frac << 1) + (a_frac != 0);
2413
2414 return a;
2415}
2416
2417float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2418{
2419 FloatParts pa = float16_unpack_canonical(a, status);
2420 FloatParts pr = sqrt_float(pa, status, &float16_params);
2421 return float16_round_pack_canonical(pr, status);
2422}
2423
2424float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2425{
2426 FloatParts pa = float32_unpack_canonical(a, status);
2427 FloatParts pr = sqrt_float(pa, status, &float32_params);
2428 return float32_round_pack_canonical(pr, status);
2429}
2430
2431float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2432{
2433 FloatParts pa = float64_unpack_canonical(a, status);
2434 FloatParts pr = sqrt_float(pa, status, &float64_params);
2435 return float64_round_pack_canonical(pr, status);
2436}
2437
0218a16e
RH
2438/*----------------------------------------------------------------------------
2439| The pattern for a default generated NaN.
2440*----------------------------------------------------------------------------*/
2441
2442float16 float16_default_nan(float_status *status)
2443{
2444 FloatParts p = parts_default_nan(status);
2445 p.frac >>= float16_params.frac_shift;
2446 return float16_pack_raw(p);
2447}
2448
2449float32 float32_default_nan(float_status *status)
2450{
2451 FloatParts p = parts_default_nan(status);
2452 p.frac >>= float32_params.frac_shift;
2453 return float32_pack_raw(p);
2454}
2455
2456float64 float64_default_nan(float_status *status)
2457{
2458 FloatParts p = parts_default_nan(status);
2459 p.frac >>= float64_params.frac_shift;
2460 return float64_pack_raw(p);
2461}
2462
2463float128 float128_default_nan(float_status *status)
2464{
2465 FloatParts p = parts_default_nan(status);
2466 float128 r;
2467
2468 /* Extrapolate from the choices made by parts_default_nan to fill
2469 * in the quad-floating format. If the low bit is set, assume we
2470 * want to set all non-snan bits.
2471 */
2472 r.low = -(p.frac & 1);
2473 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2474 r.high |= LIT64(0x7FFF000000000000);
2475 r.high |= (uint64_t)p.sign << 63;
2476
2477 return r;
2478}
c13bb2da 2479
158142c2 2480/*----------------------------------------------------------------------------
377ed926
RH
2481| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
2482*----------------------------------------------------------------------------*/
2483
2484float16 float16_silence_nan(float16 a, float_status *status)
2485{
2486 FloatParts p = float16_unpack_raw(a);
2487 p.frac <<= float16_params.frac_shift;
2488 p = parts_silence_nan(p, status);
2489 p.frac >>= float16_params.frac_shift;
2490 return float16_pack_raw(p);
2491}
2492
2493float32 float32_silence_nan(float32 a, float_status *status)
2494{
2495 FloatParts p = float32_unpack_raw(a);
2496 p.frac <<= float32_params.frac_shift;
2497 p = parts_silence_nan(p, status);
2498 p.frac >>= float32_params.frac_shift;
2499 return float32_pack_raw(p);
2500}
2501
2502float64 float64_silence_nan(float64 a, float_status *status)
2503{
2504 FloatParts p = float64_unpack_raw(a);
2505 p.frac <<= float64_params.frac_shift;
2506 p = parts_silence_nan(p, status);
2507 p.frac >>= float64_params.frac_shift;
2508 return float64_pack_raw(p);
2509}
2510
2511/*----------------------------------------------------------------------------
158142c2
FB
2512| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2513| and 7, and returns the properly rounded 32-bit integer corresponding to the
2514| input. If `zSign' is 1, the input is negated before being converted to an
2515| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2516| is simply rounded to an integer, with the inexact exception raised if the
2517| input cannot be represented exactly as an integer. However, if the fixed-
2518| point input is too large, the invalid exception is raised and the largest
2519| positive or negative integer is returned.
2520*----------------------------------------------------------------------------*/
2521
f4014512 2522static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2523{
8f506c70 2524 int8_t roundingMode;
158142c2 2525 flag roundNearestEven;
8f506c70 2526 int8_t roundIncrement, roundBits;
760e1416 2527 int32_t z;
158142c2 2528
a2f2d288 2529 roundingMode = status->float_rounding_mode;
158142c2 2530 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2531 switch (roundingMode) {
2532 case float_round_nearest_even:
f9288a76 2533 case float_round_ties_away:
dc355b76
PM
2534 roundIncrement = 0x40;
2535 break;
2536 case float_round_to_zero:
2537 roundIncrement = 0;
2538 break;
2539 case float_round_up:
2540 roundIncrement = zSign ? 0 : 0x7f;
2541 break;
2542 case float_round_down:
2543 roundIncrement = zSign ? 0x7f : 0;
2544 break;
2545 default:
2546 abort();
158142c2
FB
2547 }
2548 roundBits = absZ & 0x7F;
2549 absZ = ( absZ + roundIncrement )>>7;
2550 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2551 z = absZ;
2552 if ( zSign ) z = - z;
2553 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2554 float_raise(float_flag_invalid, status);
bb98fe42 2555 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2556 }
a2f2d288
PM
2557 if (roundBits) {
2558 status->float_exception_flags |= float_flag_inexact;
2559 }
158142c2
FB
2560 return z;
2561
2562}
2563
2564/*----------------------------------------------------------------------------
2565| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2566| `absZ1', with binary point between bits 63 and 64 (between the input words),
2567| and returns the properly rounded 64-bit integer corresponding to the input.
2568| If `zSign' is 1, the input is negated before being converted to an integer.
2569| Ordinarily, the fixed-point input is simply rounded to an integer, with
2570| the inexact exception raised if the input cannot be represented exactly as
2571| an integer. However, if the fixed-point input is too large, the invalid
2572| exception is raised and the largest positive or negative integer is
2573| returned.
2574*----------------------------------------------------------------------------*/
2575
f42c2224 2576static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2577 float_status *status)
158142c2 2578{
8f506c70 2579 int8_t roundingMode;
158142c2 2580 flag roundNearestEven, increment;
760e1416 2581 int64_t z;
158142c2 2582
a2f2d288 2583 roundingMode = status->float_rounding_mode;
158142c2 2584 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2585 switch (roundingMode) {
2586 case float_round_nearest_even:
f9288a76 2587 case float_round_ties_away:
dc355b76
PM
2588 increment = ((int64_t) absZ1 < 0);
2589 break;
2590 case float_round_to_zero:
2591 increment = 0;
2592 break;
2593 case float_round_up:
2594 increment = !zSign && absZ1;
2595 break;
2596 case float_round_down:
2597 increment = zSign && absZ1;
2598 break;
2599 default:
2600 abort();
158142c2
FB
2601 }
2602 if ( increment ) {
2603 ++absZ0;
2604 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2605 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2606 }
2607 z = absZ0;
2608 if ( zSign ) z = - z;
2609 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2610 overflow:
ff32e16e 2611 float_raise(float_flag_invalid, status);
158142c2 2612 return
bb98fe42 2613 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2614 : LIT64( 0x7FFFFFFFFFFFFFFF );
2615 }
a2f2d288
PM
2616 if (absZ1) {
2617 status->float_exception_flags |= float_flag_inexact;
2618 }
158142c2
FB
2619 return z;
2620
2621}
2622
fb3ea83a
TM
2623/*----------------------------------------------------------------------------
2624| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2625| `absZ1', with binary point between bits 63 and 64 (between the input words),
2626| and returns the properly rounded 64-bit unsigned integer corresponding to the
2627| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2628| with the inexact exception raised if the input cannot be represented exactly
2629| as an integer. However, if the fixed-point input is too large, the invalid
2630| exception is raised and the largest unsigned integer is returned.
2631*----------------------------------------------------------------------------*/
2632
f42c2224 2633static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2634 uint64_t absZ1, float_status *status)
fb3ea83a 2635{
8f506c70 2636 int8_t roundingMode;
fb3ea83a
TM
2637 flag roundNearestEven, increment;
2638
a2f2d288 2639 roundingMode = status->float_rounding_mode;
fb3ea83a 2640 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2641 switch (roundingMode) {
2642 case float_round_nearest_even:
f9288a76 2643 case float_round_ties_away:
dc355b76
PM
2644 increment = ((int64_t)absZ1 < 0);
2645 break;
2646 case float_round_to_zero:
2647 increment = 0;
2648 break;
2649 case float_round_up:
2650 increment = !zSign && absZ1;
2651 break;
2652 case float_round_down:
2653 increment = zSign && absZ1;
2654 break;
2655 default:
2656 abort();
fb3ea83a
TM
2657 }
2658 if (increment) {
2659 ++absZ0;
2660 if (absZ0 == 0) {
ff32e16e 2661 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2662 return LIT64(0xFFFFFFFFFFFFFFFF);
2663 }
2664 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2665 }
2666
2667 if (zSign && absZ0) {
ff32e16e 2668 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2669 return 0;
2670 }
2671
2672 if (absZ1) {
a2f2d288 2673 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2674 }
2675 return absZ0;
2676}
2677
37d18660
PM
2678/*----------------------------------------------------------------------------
2679| If `a' is denormal and we are in flush-to-zero mode then set the
2680| input-denormal exception and return zero. Otherwise just return the value.
2681*----------------------------------------------------------------------------*/
e5a41ffa 2682float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2683{
a2f2d288 2684 if (status->flush_inputs_to_zero) {
37d18660 2685 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2686 float_raise(float_flag_input_denormal, status);
37d18660
PM
2687 return make_float32(float32_val(a) & 0x80000000);
2688 }
2689 }
2690 return a;
2691}
2692
158142c2
FB
2693/*----------------------------------------------------------------------------
2694| Normalizes the subnormal single-precision floating-point value represented
2695| by the denormalized significand `aSig'. The normalized exponent and
2696| significand are stored at the locations pointed to by `zExpPtr' and
2697| `zSigPtr', respectively.
2698*----------------------------------------------------------------------------*/
2699
2700static void
0c48262d 2701 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2702{
8f506c70 2703 int8_t shiftCount;
158142c2 2704
0019d5c3 2705 shiftCount = clz32(aSig) - 8;
158142c2
FB
2706 *zSigPtr = aSig<<shiftCount;
2707 *zExpPtr = 1 - shiftCount;
2708
2709}
2710
158142c2
FB
2711/*----------------------------------------------------------------------------
2712| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2713| and significand `zSig', and returns the proper single-precision floating-
2714| point value corresponding to the abstract input. Ordinarily, the abstract
2715| value is simply rounded and packed into the single-precision format, with
2716| the inexact exception raised if the abstract input cannot be represented
2717| exactly. However, if the abstract value is too large, the overflow and
2718| inexact exceptions are raised and an infinity or maximal finite value is
2719| returned. If the abstract value is too small, the input value is rounded to
2720| a subnormal number, and the underflow and inexact exceptions are raised if
2721| the abstract input cannot be represented exactly as a subnormal single-
2722| precision floating-point number.
2723| The input significand `zSig' has its binary point between bits 30
2724| and 29, which is 7 bits to the left of the usual location. This shifted
2725| significand must be normalized or smaller. If `zSig' is not normalized,
2726| `zExp' must be 0; in that case, the result returned is a subnormal number,
2727| and it must not require rounding. In the usual case that `zSig' is
2728| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2729| The handling of underflow and overflow follows the IEC/IEEE Standard for
2730| Binary Floating-Point Arithmetic.
2731*----------------------------------------------------------------------------*/
2732
0c48262d 2733static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2734 float_status *status)
158142c2 2735{
8f506c70 2736 int8_t roundingMode;
158142c2 2737 flag roundNearestEven;
8f506c70 2738 int8_t roundIncrement, roundBits;
158142c2
FB
2739 flag isTiny;
2740
a2f2d288 2741 roundingMode = status->float_rounding_mode;
158142c2 2742 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2743 switch (roundingMode) {
2744 case float_round_nearest_even:
f9288a76 2745 case float_round_ties_away:
dc355b76
PM
2746 roundIncrement = 0x40;
2747 break;
2748 case float_round_to_zero:
2749 roundIncrement = 0;
2750 break;
2751 case float_round_up:
2752 roundIncrement = zSign ? 0 : 0x7f;
2753 break;
2754 case float_round_down:
2755 roundIncrement = zSign ? 0x7f : 0;
2756 break;
2757 default:
2758 abort();
2759 break;
158142c2
FB
2760 }
2761 roundBits = zSig & 0x7F;
bb98fe42 2762 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2763 if ( ( 0xFD < zExp )
2764 || ( ( zExp == 0xFD )
bb98fe42 2765 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2766 ) {
ff32e16e 2767 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2768 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2769 }
2770 if ( zExp < 0 ) {
a2f2d288 2771 if (status->flush_to_zero) {
ff32e16e 2772 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2773 return packFloat32(zSign, 0, 0);
2774 }
158142c2 2775 isTiny =
a2f2d288
PM
2776 (status->float_detect_tininess
2777 == float_tininess_before_rounding)
158142c2
FB
2778 || ( zExp < -1 )
2779 || ( zSig + roundIncrement < 0x80000000 );
2780 shift32RightJamming( zSig, - zExp, &zSig );
2781 zExp = 0;
2782 roundBits = zSig & 0x7F;
ff32e16e
PM
2783 if (isTiny && roundBits) {
2784 float_raise(float_flag_underflow, status);
2785 }
158142c2
FB
2786 }
2787 }
a2f2d288
PM
2788 if (roundBits) {
2789 status->float_exception_flags |= float_flag_inexact;
2790 }
158142c2
FB
2791 zSig = ( zSig + roundIncrement )>>7;
2792 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2793 if ( zSig == 0 ) zExp = 0;
2794 return packFloat32( zSign, zExp, zSig );
2795
2796}
2797
2798/*----------------------------------------------------------------------------
2799| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2800| and significand `zSig', and returns the proper single-precision floating-
2801| point value corresponding to the abstract input. This routine is just like
2802| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2803| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2804| floating-point exponent.
2805*----------------------------------------------------------------------------*/
2806
2807static float32
0c48262d 2808 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2809 float_status *status)
158142c2 2810{
8f506c70 2811 int8_t shiftCount;
158142c2 2812
0019d5c3 2813 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
2814 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2815 status);
158142c2
FB
2816
2817}
2818
37d18660
PM
2819/*----------------------------------------------------------------------------
2820| If `a' is denormal and we are in flush-to-zero mode then set the
2821| input-denormal exception and return zero. Otherwise just return the value.
2822*----------------------------------------------------------------------------*/
e5a41ffa 2823float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2824{
a2f2d288 2825 if (status->flush_inputs_to_zero) {
37d18660 2826 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2827 float_raise(float_flag_input_denormal, status);
37d18660
PM
2828 return make_float64(float64_val(a) & (1ULL << 63));
2829 }
2830 }
2831 return a;
2832}
2833
158142c2
FB
2834/*----------------------------------------------------------------------------
2835| Normalizes the subnormal double-precision floating-point value represented
2836| by the denormalized significand `aSig'. The normalized exponent and
2837| significand are stored at the locations pointed to by `zExpPtr' and
2838| `zSigPtr', respectively.
2839*----------------------------------------------------------------------------*/
2840
2841static void
0c48262d 2842 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2843{
8f506c70 2844 int8_t shiftCount;
158142c2 2845
0019d5c3 2846 shiftCount = clz64(aSig) - 11;
158142c2
FB
2847 *zSigPtr = aSig<<shiftCount;
2848 *zExpPtr = 1 - shiftCount;
2849
2850}
2851
2852/*----------------------------------------------------------------------------
2853| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2854| double-precision floating-point value, returning the result. After being
2855| shifted into the proper positions, the three fields are simply added
2856| together to form the result. This means that any integer portion of `zSig'
2857| will be added into the exponent. Since a properly normalized significand
2858| will have an integer portion equal to 1, the `zExp' input should be 1 less
2859| than the desired result exponent whenever `zSig' is a complete, normalized
2860| significand.
2861*----------------------------------------------------------------------------*/
2862
0c48262d 2863static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2864{
2865
f090c9d4 2866 return make_float64(
bb98fe42 2867 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2868
2869}
2870
2871/*----------------------------------------------------------------------------
2872| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2873| and significand `zSig', and returns the proper double-precision floating-
2874| point value corresponding to the abstract input. Ordinarily, the abstract
2875| value is simply rounded and packed into the double-precision format, with
2876| the inexact exception raised if the abstract input cannot be represented
2877| exactly. However, if the abstract value is too large, the overflow and
2878| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2879| returned. If the abstract value is too small, the input value is rounded to
2880| a subnormal number, and the underflow and inexact exceptions are raised if
2881| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2882| precision floating-point number.
2883| The input significand `zSig' has its binary point between bits 62
2884| and 61, which is 10 bits to the left of the usual location. This shifted
2885| significand must be normalized or smaller. If `zSig' is not normalized,
2886| `zExp' must be 0; in that case, the result returned is a subnormal number,
2887| and it must not require rounding. In the usual case that `zSig' is
2888| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2889| The handling of underflow and overflow follows the IEC/IEEE Standard for
2890| Binary Floating-Point Arithmetic.
2891*----------------------------------------------------------------------------*/
2892
0c48262d 2893static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2894 float_status *status)
158142c2 2895{
8f506c70 2896 int8_t roundingMode;
158142c2 2897 flag roundNearestEven;
0c48262d 2898 int roundIncrement, roundBits;
158142c2
FB
2899 flag isTiny;
2900
a2f2d288 2901 roundingMode = status->float_rounding_mode;
158142c2 2902 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2903 switch (roundingMode) {
2904 case float_round_nearest_even:
f9288a76 2905 case float_round_ties_away:
dc355b76
PM
2906 roundIncrement = 0x200;
2907 break;
2908 case float_round_to_zero:
2909 roundIncrement = 0;
2910 break;
2911 case float_round_up:
2912 roundIncrement = zSign ? 0 : 0x3ff;
2913 break;
2914 case float_round_down:
2915 roundIncrement = zSign ? 0x3ff : 0;
2916 break;
9ee6f678
BR
2917 case float_round_to_odd:
2918 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2919 break;
dc355b76
PM
2920 default:
2921 abort();
158142c2
FB
2922 }
2923 roundBits = zSig & 0x3FF;
bb98fe42 2924 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2925 if ( ( 0x7FD < zExp )
2926 || ( ( zExp == 0x7FD )
bb98fe42 2927 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2928 ) {
9ee6f678
BR
2929 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2930 roundIncrement != 0;
ff32e16e 2931 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2932 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2933 }
2934 if ( zExp < 0 ) {
a2f2d288 2935 if (status->flush_to_zero) {
ff32e16e 2936 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2937 return packFloat64(zSign, 0, 0);
2938 }
158142c2 2939 isTiny =
a2f2d288
PM
2940 (status->float_detect_tininess
2941 == float_tininess_before_rounding)
158142c2
FB
2942 || ( zExp < -1 )
2943 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2944 shift64RightJamming( zSig, - zExp, &zSig );
2945 zExp = 0;
2946 roundBits = zSig & 0x3FF;
ff32e16e
PM
2947 if (isTiny && roundBits) {
2948 float_raise(float_flag_underflow, status);
2949 }
9ee6f678
BR
2950 if (roundingMode == float_round_to_odd) {
2951 /*
2952 * For round-to-odd case, the roundIncrement depends on
2953 * zSig which just changed.
2954 */
2955 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2956 }
158142c2
FB
2957 }
2958 }
a2f2d288
PM
2959 if (roundBits) {
2960 status->float_exception_flags |= float_flag_inexact;
2961 }
158142c2
FB
2962 zSig = ( zSig + roundIncrement )>>10;
2963 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2964 if ( zSig == 0 ) zExp = 0;
2965 return packFloat64( zSign, zExp, zSig );
2966
2967}
2968
2969/*----------------------------------------------------------------------------
2970| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2971| and significand `zSig', and returns the proper double-precision floating-
2972| point value corresponding to the abstract input. This routine is just like
2973| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2974| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2975| floating-point exponent.
2976*----------------------------------------------------------------------------*/
2977
2978static float64
0c48262d 2979 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2980 float_status *status)
158142c2 2981{
8f506c70 2982 int8_t shiftCount;
158142c2 2983
0019d5c3 2984 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
2985 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2986 status);
158142c2
FB
2987
2988}
2989
158142c2
FB
2990/*----------------------------------------------------------------------------
2991| Normalizes the subnormal extended double-precision floating-point value
2992| represented by the denormalized significand `aSig'. The normalized exponent
2993| and significand are stored at the locations pointed to by `zExpPtr' and
2994| `zSigPtr', respectively.
2995*----------------------------------------------------------------------------*/
2996
88857aca
LV
2997void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2998 uint64_t *zSigPtr)
158142c2 2999{
8f506c70 3000 int8_t shiftCount;
158142c2 3001
0019d5c3 3002 shiftCount = clz64(aSig);
158142c2
FB
3003 *zSigPtr = aSig<<shiftCount;
3004 *zExpPtr = 1 - shiftCount;
158142c2
FB
3005}
3006
3007/*----------------------------------------------------------------------------
3008| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3009| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3010| and returns the proper extended double-precision floating-point value
3011| corresponding to the abstract input. Ordinarily, the abstract value is
3012| rounded and packed into the extended double-precision format, with the
3013| inexact exception raised if the abstract input cannot be represented
3014| exactly. However, if the abstract value is too large, the overflow and
3015| inexact exceptions are raised and an infinity or maximal finite value is
3016| returned. If the abstract value is too small, the input value is rounded to
3017| a subnormal number, and the underflow and inexact exceptions are raised if
3018| the abstract input cannot be represented exactly as a subnormal extended
3019| double-precision floating-point number.
3020| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3021| number of bits as single or double precision, respectively. Otherwise, the
3022| result is rounded to the full precision of the extended double-precision
3023| format.
3024| The input significand must be normalized or smaller. If the input
3025| significand is not normalized, `zExp' must be 0; in that case, the result
3026| returned is a subnormal number, and it must not require rounding. The
3027| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3028| Floating-Point Arithmetic.
3029*----------------------------------------------------------------------------*/
3030
88857aca
LV
3031floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3032 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3033 float_status *status)
158142c2 3034{
8f506c70 3035 int8_t roundingMode;
158142c2 3036 flag roundNearestEven, increment, isTiny;
f42c2224 3037 int64_t roundIncrement, roundMask, roundBits;
158142c2 3038
a2f2d288 3039 roundingMode = status->float_rounding_mode;
158142c2
FB
3040 roundNearestEven = ( roundingMode == float_round_nearest_even );
3041 if ( roundingPrecision == 80 ) goto precision80;
3042 if ( roundingPrecision == 64 ) {
3043 roundIncrement = LIT64( 0x0000000000000400 );
3044 roundMask = LIT64( 0x00000000000007FF );
3045 }
3046 else if ( roundingPrecision == 32 ) {
3047 roundIncrement = LIT64( 0x0000008000000000 );
3048 roundMask = LIT64( 0x000000FFFFFFFFFF );
3049 }
3050 else {
3051 goto precision80;
3052 }
3053 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3054 switch (roundingMode) {
3055 case float_round_nearest_even:
f9288a76 3056 case float_round_ties_away:
dc355b76
PM
3057 break;
3058 case float_round_to_zero:
3059 roundIncrement = 0;
3060 break;
3061 case float_round_up:
3062 roundIncrement = zSign ? 0 : roundMask;
3063 break;
3064 case float_round_down:
3065 roundIncrement = zSign ? roundMask : 0;
3066 break;
3067 default:
3068 abort();
158142c2
FB
3069 }
3070 roundBits = zSig0 & roundMask;
bb98fe42 3071 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3072 if ( ( 0x7FFE < zExp )
3073 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3074 ) {
3075 goto overflow;
3076 }
3077 if ( zExp <= 0 ) {
a2f2d288 3078 if (status->flush_to_zero) {
ff32e16e 3079 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3080 return packFloatx80(zSign, 0, 0);
3081 }
158142c2 3082 isTiny =
a2f2d288
PM
3083 (status->float_detect_tininess
3084 == float_tininess_before_rounding)
158142c2
FB
3085 || ( zExp < 0 )
3086 || ( zSig0 <= zSig0 + roundIncrement );
3087 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3088 zExp = 0;
3089 roundBits = zSig0 & roundMask;
ff32e16e
PM
3090 if (isTiny && roundBits) {
3091 float_raise(float_flag_underflow, status);
3092 }
a2f2d288
PM
3093 if (roundBits) {
3094 status->float_exception_flags |= float_flag_inexact;
3095 }
158142c2 3096 zSig0 += roundIncrement;
bb98fe42 3097 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3098 roundIncrement = roundMask + 1;
3099 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3100 roundMask |= roundIncrement;
3101 }
3102 zSig0 &= ~ roundMask;
3103 return packFloatx80( zSign, zExp, zSig0 );
3104 }
3105 }
a2f2d288
PM
3106 if (roundBits) {
3107 status->float_exception_flags |= float_flag_inexact;
3108 }
158142c2
FB
3109 zSig0 += roundIncrement;
3110 if ( zSig0 < roundIncrement ) {
3111 ++zExp;
3112 zSig0 = LIT64( 0x8000000000000000 );
3113 }
3114 roundIncrement = roundMask + 1;
3115 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3116 roundMask |= roundIncrement;
3117 }
3118 zSig0 &= ~ roundMask;
3119 if ( zSig0 == 0 ) zExp = 0;
3120 return packFloatx80( zSign, zExp, zSig0 );
3121 precision80:
dc355b76
PM
3122 switch (roundingMode) {
3123 case float_round_nearest_even:
f9288a76 3124 case float_round_ties_away:
dc355b76
PM
3125 increment = ((int64_t)zSig1 < 0);
3126 break;
3127 case float_round_to_zero:
3128 increment = 0;
3129 break;
3130 case float_round_up:
3131 increment = !zSign && zSig1;
3132 break;
3133 case float_round_down:
3134 increment = zSign && zSig1;
3135 break;
3136 default:
3137 abort();
158142c2 3138 }
bb98fe42 3139 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3140 if ( ( 0x7FFE < zExp )
3141 || ( ( zExp == 0x7FFE )
3142 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3143 && increment
3144 )
3145 ) {
3146 roundMask = 0;
3147 overflow:
ff32e16e 3148 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3149 if ( ( roundingMode == float_round_to_zero )
3150 || ( zSign && ( roundingMode == float_round_up ) )
3151 || ( ! zSign && ( roundingMode == float_round_down ) )
3152 ) {
3153 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3154 }
0f605c88
LV
3155 return packFloatx80(zSign,
3156 floatx80_infinity_high,
3157 floatx80_infinity_low);
158142c2
FB
3158 }
3159 if ( zExp <= 0 ) {
3160 isTiny =
a2f2d288
PM
3161 (status->float_detect_tininess
3162 == float_tininess_before_rounding)
158142c2
FB
3163 || ( zExp < 0 )
3164 || ! increment
3165 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3166 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3167 zExp = 0;
ff32e16e
PM
3168 if (isTiny && zSig1) {
3169 float_raise(float_flag_underflow, status);
3170 }
a2f2d288
PM
3171 if (zSig1) {
3172 status->float_exception_flags |= float_flag_inexact;
3173 }
dc355b76
PM
3174 switch (roundingMode) {
3175 case float_round_nearest_even:
f9288a76 3176 case float_round_ties_away:
dc355b76
PM
3177 increment = ((int64_t)zSig1 < 0);
3178 break;
3179 case float_round_to_zero:
3180 increment = 0;
3181 break;
3182 case float_round_up:
3183 increment = !zSign && zSig1;
3184 break;
3185 case float_round_down:
3186 increment = zSign && zSig1;
3187 break;
3188 default:
3189 abort();
158142c2
FB
3190 }
3191 if ( increment ) {
3192 ++zSig0;
3193 zSig0 &=
bb98fe42
AF
3194 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3195 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3196 }
3197 return packFloatx80( zSign, zExp, zSig0 );
3198 }
3199 }
a2f2d288
PM
3200 if (zSig1) {
3201 status->float_exception_flags |= float_flag_inexact;
3202 }
158142c2
FB
3203 if ( increment ) {
3204 ++zSig0;
3205 if ( zSig0 == 0 ) {
3206 ++zExp;
3207 zSig0 = LIT64( 0x8000000000000000 );
3208 }
3209 else {
bb98fe42 3210 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3211 }
3212 }
3213 else {
3214 if ( zSig0 == 0 ) zExp = 0;
3215 }
3216 return packFloatx80( zSign, zExp, zSig0 );
3217
3218}
3219
3220/*----------------------------------------------------------------------------
3221| Takes an abstract floating-point value having sign `zSign', exponent
3222| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3223| and returns the proper extended double-precision floating-point value
3224| corresponding to the abstract input. This routine is just like
3225| `roundAndPackFloatx80' except that the input significand does not have to be
3226| normalized.
3227*----------------------------------------------------------------------------*/
3228
88857aca
LV
3229floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3230 flag zSign, int32_t zExp,
3231 uint64_t zSig0, uint64_t zSig1,
3232 float_status *status)
158142c2 3233{
8f506c70 3234 int8_t shiftCount;
158142c2
FB
3235
3236 if ( zSig0 == 0 ) {
3237 zSig0 = zSig1;
3238 zSig1 = 0;
3239 zExp -= 64;
3240 }
0019d5c3 3241 shiftCount = clz64(zSig0);
158142c2
FB
3242 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3243 zExp -= shiftCount;
ff32e16e
PM
3244 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3245 zSig0, zSig1, status);
158142c2
FB
3246
3247}
3248
158142c2
FB
3249/*----------------------------------------------------------------------------
3250| Returns the least-significant 64 fraction bits of the quadruple-precision
3251| floating-point value `a'.
3252*----------------------------------------------------------------------------*/
3253
a49db98d 3254static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
3255{
3256
3257 return a.low;
3258
3259}
3260
3261/*----------------------------------------------------------------------------
3262| Returns the most-significant 48 fraction bits of the quadruple-precision
3263| floating-point value `a'.
3264*----------------------------------------------------------------------------*/
3265
a49db98d 3266static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
3267{
3268
3269 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3270
3271}
3272
3273/*----------------------------------------------------------------------------
3274| Returns the exponent bits of the quadruple-precision floating-point value
3275| `a'.
3276*----------------------------------------------------------------------------*/
3277
f4014512 3278static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
3279{
3280
3281 return ( a.high>>48 ) & 0x7FFF;
3282
3283}
3284
3285/*----------------------------------------------------------------------------
3286| Returns the sign bit of the quadruple-precision floating-point value `a'.
3287*----------------------------------------------------------------------------*/
3288
a49db98d 3289static inline flag extractFloat128Sign( float128 a )
158142c2
FB
3290{
3291
3292 return a.high>>63;
3293
3294}
3295
3296/*----------------------------------------------------------------------------
3297| Normalizes the subnormal quadruple-precision floating-point value
3298| represented by the denormalized significand formed by the concatenation of
3299| `aSig0' and `aSig1'. The normalized exponent is stored at the location
3300| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
3301| significand are stored at the location pointed to by `zSig0Ptr', and the
3302| least significant 64 bits of the normalized significand are stored at the
3303| location pointed to by `zSig1Ptr'.
3304*----------------------------------------------------------------------------*/
3305
3306static void
3307 normalizeFloat128Subnormal(
bb98fe42
AF
3308 uint64_t aSig0,
3309 uint64_t aSig1,
f4014512 3310 int32_t *zExpPtr,
bb98fe42
AF
3311 uint64_t *zSig0Ptr,
3312 uint64_t *zSig1Ptr
158142c2
FB
3313 )
3314{
8f506c70 3315 int8_t shiftCount;
158142c2
FB
3316
3317 if ( aSig0 == 0 ) {
0019d5c3 3318 shiftCount = clz64(aSig1) - 15;
158142c2
FB
3319 if ( shiftCount < 0 ) {
3320 *zSig0Ptr = aSig1>>( - shiftCount );
3321 *zSig1Ptr = aSig1<<( shiftCount & 63 );
3322 }
3323 else {
3324 *zSig0Ptr = aSig1<<shiftCount;
3325 *zSig1Ptr = 0;
3326 }
3327 *zExpPtr = - shiftCount - 63;
3328 }
3329 else {
0019d5c3 3330 shiftCount = clz64(aSig0) - 15;
158142c2
FB
3331 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
3332 *zExpPtr = 1 - shiftCount;
3333 }
3334
3335}
3336
3337/*----------------------------------------------------------------------------
3338| Packs the sign `zSign', the exponent `zExp', and the significand formed
3339| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
3340| floating-point value, returning the result. After being shifted into the
3341| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
3342| added together to form the most significant 32 bits of the result. This
3343| means that any integer portion of `zSig0' will be added into the exponent.
3344| Since a properly normalized significand will have an integer portion equal
3345| to 1, the `zExp' input should be 1 less than the desired result exponent
3346| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
3347| significand.
3348*----------------------------------------------------------------------------*/
3349
a49db98d 3350static inline float128
f4014512 3351 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
3352{
3353 float128 z;
3354
3355 z.low = zSig1;
bb98fe42 3356 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
3357 return z;
3358
3359}
3360
3361/*----------------------------------------------------------------------------
3362| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3363| and extended significand formed by the concatenation of `zSig0', `zSig1',
3364| and `zSig2', and returns the proper quadruple-precision floating-point value
3365| corresponding to the abstract input. Ordinarily, the abstract value is
3366| simply rounded and packed into the quadruple-precision format, with the
3367| inexact exception raised if the abstract input cannot be represented
3368| exactly. However, if the abstract value is too large, the overflow and
3369| inexact exceptions are raised and an infinity or maximal finite value is
3370| returned. If the abstract value is too small, the input value is rounded to
3371| a subnormal number, and the underflow and inexact exceptions are raised if
3372| the abstract input cannot be represented exactly as a subnormal quadruple-
3373| precision floating-point number.
3374| The input significand must be normalized or smaller. If the input
3375| significand is not normalized, `zExp' must be 0; in that case, the result
3376| returned is a subnormal number, and it must not require rounding. In the
3377| usual case that the input significand is normalized, `zExp' must be 1 less
3378| than the ``true'' floating-point exponent. The handling of underflow and
3379| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3380*----------------------------------------------------------------------------*/
3381
f4014512 3382static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3383 uint64_t zSig0, uint64_t zSig1,
3384 uint64_t zSig2, float_status *status)
158142c2 3385{
8f506c70 3386 int8_t roundingMode;
158142c2
FB
3387 flag roundNearestEven, increment, isTiny;
3388
a2f2d288 3389 roundingMode = status->float_rounding_mode;
158142c2 3390 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3391 switch (roundingMode) {
3392 case float_round_nearest_even:
f9288a76 3393 case float_round_ties_away:
dc355b76
PM
3394 increment = ((int64_t)zSig2 < 0);
3395 break;
3396 case float_round_to_zero:
3397 increment = 0;
3398 break;
3399 case float_round_up:
3400 increment = !zSign && zSig2;
3401 break;
3402 case float_round_down:
3403 increment = zSign && zSig2;
3404 break;
9ee6f678
BR
3405 case float_round_to_odd:
3406 increment = !(zSig1 & 0x1) && zSig2;
3407 break;
dc355b76
PM
3408 default:
3409 abort();
158142c2 3410 }
bb98fe42 3411 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3412 if ( ( 0x7FFD < zExp )
3413 || ( ( zExp == 0x7FFD )
3414 && eq128(
3415 LIT64( 0x0001FFFFFFFFFFFF ),
3416 LIT64( 0xFFFFFFFFFFFFFFFF ),
3417 zSig0,
3418 zSig1
3419 )
3420 && increment
3421 )
3422 ) {
ff32e16e 3423 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3424 if ( ( roundingMode == float_round_to_zero )
3425 || ( zSign && ( roundingMode == float_round_up ) )
3426 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3427 || (roundingMode == float_round_to_odd)
158142c2
FB
3428 ) {
3429 return
3430 packFloat128(
3431 zSign,
3432 0x7FFE,
3433 LIT64( 0x0000FFFFFFFFFFFF ),
3434 LIT64( 0xFFFFFFFFFFFFFFFF )
3435 );
3436 }
3437 return packFloat128( zSign, 0x7FFF, 0, 0 );
3438 }
3439 if ( zExp < 0 ) {
a2f2d288 3440 if (status->flush_to_zero) {
ff32e16e 3441 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3442 return packFloat128(zSign, 0, 0, 0);
3443 }
158142c2 3444 isTiny =
a2f2d288
PM
3445 (status->float_detect_tininess
3446 == float_tininess_before_rounding)
158142c2
FB
3447 || ( zExp < -1 )
3448 || ! increment
3449 || lt128(
3450 zSig0,
3451 zSig1,
3452 LIT64( 0x0001FFFFFFFFFFFF ),
3453 LIT64( 0xFFFFFFFFFFFFFFFF )
3454 );
3455 shift128ExtraRightJamming(
3456 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3457 zExp = 0;
ff32e16e
PM
3458 if (isTiny && zSig2) {
3459 float_raise(float_flag_underflow, status);
3460 }
dc355b76
PM
3461 switch (roundingMode) {
3462 case float_round_nearest_even:
f9288a76 3463 case float_round_ties_away:
dc355b76
PM
3464 increment = ((int64_t)zSig2 < 0);
3465 break;
3466 case float_round_to_zero:
3467 increment = 0;
3468 break;
3469 case float_round_up:
3470 increment = !zSign && zSig2;
3471 break;
3472 case float_round_down:
3473 increment = zSign && zSig2;
3474 break;
9ee6f678
BR
3475 case float_round_to_odd:
3476 increment = !(zSig1 & 0x1) && zSig2;
3477 break;
dc355b76
PM
3478 default:
3479 abort();
158142c2
FB
3480 }
3481 }
3482 }
a2f2d288
PM
3483 if (zSig2) {
3484 status->float_exception_flags |= float_flag_inexact;
3485 }
158142c2
FB
3486 if ( increment ) {
3487 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3488 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3489 }
3490 else {
3491 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3492 }
3493 return packFloat128( zSign, zExp, zSig0, zSig1 );
3494
3495}
3496
3497/*----------------------------------------------------------------------------
3498| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3499| and significand formed by the concatenation of `zSig0' and `zSig1', and
3500| returns the proper quadruple-precision floating-point value corresponding
3501| to the abstract input. This routine is just like `roundAndPackFloat128'
3502| except that the input significand has fewer bits and does not have to be
3503| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3504| point exponent.
3505*----------------------------------------------------------------------------*/
3506
f4014512 3507static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3508 uint64_t zSig0, uint64_t zSig1,
3509 float_status *status)
158142c2 3510{
8f506c70 3511 int8_t shiftCount;
bb98fe42 3512 uint64_t zSig2;
158142c2
FB
3513
3514 if ( zSig0 == 0 ) {
3515 zSig0 = zSig1;
3516 zSig1 = 0;
3517 zExp -= 64;
3518 }
0019d5c3 3519 shiftCount = clz64(zSig0) - 15;
158142c2
FB
3520 if ( 0 <= shiftCount ) {
3521 zSig2 = 0;
3522 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3523 }
3524 else {
3525 shift128ExtraRightJamming(
3526 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3527 }
3528 zExp -= shiftCount;
ff32e16e 3529 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3530
3531}
3532
158142c2 3533
158142c2
FB
3534/*----------------------------------------------------------------------------
3535| Returns the result of converting the 32-bit two's complement integer `a'
3536| to the extended double-precision floating-point format. The conversion
3537| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3538| Arithmetic.
3539*----------------------------------------------------------------------------*/
3540
e5a41ffa 3541floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3542{
3543 flag zSign;
3a87d009 3544 uint32_t absA;
8f506c70 3545 int8_t shiftCount;
bb98fe42 3546 uint64_t zSig;
158142c2
FB
3547
3548 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3549 zSign = ( a < 0 );
3550 absA = zSign ? - a : a;
0019d5c3 3551 shiftCount = clz32(absA) + 32;
158142c2
FB
3552 zSig = absA;
3553 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3554
3555}
3556
158142c2
FB
3557/*----------------------------------------------------------------------------
3558| Returns the result of converting the 32-bit two's complement integer `a' to
3559| the quadruple-precision floating-point format. The conversion is performed
3560| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3561*----------------------------------------------------------------------------*/
3562
e5a41ffa 3563float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3564{
3565 flag zSign;
3a87d009 3566 uint32_t absA;
8f506c70 3567 int8_t shiftCount;
bb98fe42 3568 uint64_t zSig0;
158142c2
FB
3569
3570 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3571 zSign = ( a < 0 );
3572 absA = zSign ? - a : a;
0019d5c3 3573 shiftCount = clz32(absA) + 17;
158142c2
FB
3574 zSig0 = absA;
3575 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3576
3577}
3578
158142c2
FB
3579/*----------------------------------------------------------------------------
3580| Returns the result of converting the 64-bit two's complement integer `a'
3581| to the extended double-precision floating-point format. The conversion
3582| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3583| Arithmetic.
3584*----------------------------------------------------------------------------*/
3585
e5a41ffa 3586floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3587{
3588 flag zSign;
182f42fd 3589 uint64_t absA;
8f506c70 3590 int8_t shiftCount;
158142c2
FB
3591
3592 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3593 zSign = ( a < 0 );
3594 absA = zSign ? - a : a;
0019d5c3 3595 shiftCount = clz64(absA);
158142c2
FB
3596 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3597
3598}
3599
158142c2
FB
3600/*----------------------------------------------------------------------------
3601| Returns the result of converting the 64-bit two's complement integer `a' to
3602| the quadruple-precision floating-point format. The conversion is performed
3603| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3604*----------------------------------------------------------------------------*/
3605
e5a41ffa 3606float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3607{
3608 flag zSign;
182f42fd 3609 uint64_t absA;
8f506c70 3610 int8_t shiftCount;
f4014512 3611 int32_t zExp;
bb98fe42 3612 uint64_t zSig0, zSig1;
158142c2
FB
3613
3614 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3615 zSign = ( a < 0 );
3616 absA = zSign ? - a : a;
0019d5c3 3617 shiftCount = clz64(absA) + 49;
158142c2
FB
3618 zExp = 0x406E - shiftCount;
3619 if ( 64 <= shiftCount ) {
3620 zSig1 = 0;
3621 zSig0 = absA;
3622 shiftCount -= 64;
3623 }
3624 else {
3625 zSig1 = absA;
3626 zSig0 = 0;
3627 }
3628 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3629 return packFloat128( zSign, zExp, zSig0, zSig1 );
3630
3631}
3632
6bb8e0f1
PM
3633/*----------------------------------------------------------------------------
3634| Returns the result of converting the 64-bit unsigned integer `a'
3635| to the quadruple-precision floating-point format. The conversion is performed
3636| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3637*----------------------------------------------------------------------------*/
3638
e5a41ffa 3639float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3640{
3641 if (a == 0) {
3642 return float128_zero;
3643 }
6603d506 3644 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3645}
3646
158142c2
FB
3647/*----------------------------------------------------------------------------
3648| Returns the result of converting the single-precision floating-point value
3649| `a' to the extended double-precision floating-point format. The conversion
3650| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3651| Arithmetic.
3652*----------------------------------------------------------------------------*/
3653
e5a41ffa 3654floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3655{
3656 flag aSign;
0c48262d 3657 int aExp;
bb98fe42 3658 uint32_t aSig;
158142c2 3659
ff32e16e 3660 a = float32_squash_input_denormal(a, status);
158142c2
FB
3661 aSig = extractFloat32Frac( a );
3662 aExp = extractFloat32Exp( a );
3663 aSign = extractFloat32Sign( a );
3664 if ( aExp == 0xFF ) {
ff32e16e
PM
3665 if (aSig) {
3666 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3667 }
0f605c88
LV
3668 return packFloatx80(aSign,
3669 floatx80_infinity_high,
3670 floatx80_infinity_low);
158142c2
FB
3671 }
3672 if ( aExp == 0 ) {
3673 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3674 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3675 }
3676 aSig |= 0x00800000;
bb98fe42 3677 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3678
3679}
3680
158142c2
FB
3681/*----------------------------------------------------------------------------
3682| Returns the result of converting the single-precision floating-point value
3683| `a' to the double-precision floating-point format. The conversion is
3684| performed according to the IEC/IEEE Standard for Binary Floating-Point
3685| Arithmetic.
3686*----------------------------------------------------------------------------*/
3687
e5a41ffa 3688float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3689{
3690 flag aSign;
0c48262d 3691 int aExp;
bb98fe42 3692 uint32_t aSig;
158142c2 3693
ff32e16e 3694 a = float32_squash_input_denormal(a, status);
158142c2
FB
3695 aSig = extractFloat32Frac( a );
3696 aExp = extractFloat32Exp( a );
3697 aSign = extractFloat32Sign( a );
3698 if ( aExp == 0xFF ) {
ff32e16e
PM
3699 if (aSig) {
3700 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3701 }
158142c2
FB
3702 return packFloat128( aSign, 0x7FFF, 0, 0 );
3703 }
3704 if ( aExp == 0 ) {
3705 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3706 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3707 --aExp;
3708 }
bb98fe42 3709 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3710
3711}
3712
158142c2
FB
3713/*----------------------------------------------------------------------------
3714| Returns the remainder of the single-precision floating-point value `a'
3715| with respect to the corresponding value `b'. The operation is performed
3716| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3717*----------------------------------------------------------------------------*/
3718
e5a41ffa 3719float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3720{
ed086f3d 3721 flag aSign, zSign;
0c48262d 3722 int aExp, bExp, expDiff;
bb98fe42
AF
3723 uint32_t aSig, bSig;
3724 uint32_t q;
3725 uint64_t aSig64, bSig64, q64;
3726 uint32_t alternateASig;
3727 int32_t sigMean;
ff32e16e
PM
3728 a = float32_squash_input_denormal(a, status);
3729 b = float32_squash_input_denormal(b, status);
158142c2
FB
3730
3731 aSig = extractFloat32Frac( a );
3732 aExp = extractFloat32Exp( a );
3733 aSign = extractFloat32Sign( a );
3734 bSig = extractFloat32Frac( b );
3735 bExp = extractFloat32Exp( b );
158142c2
FB
3736 if ( aExp == 0xFF ) {
3737 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3738 return propagateFloat32NaN(a, b, status);
158142c2 3739 }
ff32e16e 3740 float_raise(float_flag_invalid, status);
af39bc8c 3741 return float32_default_nan(status);
158142c2
FB
3742 }
3743 if ( bExp == 0xFF ) {
ff32e16e
PM
3744 if (bSig) {
3745 return propagateFloat32NaN(a, b, status);
3746 }
158142c2
FB
3747 return a;
3748 }
3749 if ( bExp == 0 ) {
3750 if ( bSig == 0 ) {
ff32e16e 3751 float_raise(float_flag_invalid, status);
af39bc8c 3752 return float32_default_nan(status);
158142c2
FB
3753 }
3754 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3755 }
3756 if ( aExp == 0 ) {
3757 if ( aSig == 0 ) return a;
3758 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3759 }
3760 expDiff = aExp - bExp;
3761 aSig |= 0x00800000;
3762 bSig |= 0x00800000;
3763 if ( expDiff < 32 ) {
3764 aSig <<= 8;
3765 bSig <<= 8;
3766 if ( expDiff < 0 ) {
3767 if ( expDiff < -1 ) return a;
3768 aSig >>= 1;
3769 }
3770 q = ( bSig <= aSig );
3771 if ( q ) aSig -= bSig;
3772 if ( 0 < expDiff ) {
bb98fe42 3773 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3774 q >>= 32 - expDiff;
3775 bSig >>= 2;
3776 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3777 }
3778 else {
3779 aSig >>= 2;
3780 bSig >>= 2;
3781 }
3782 }
3783 else {
3784 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3785 aSig64 = ( (uint64_t) aSig )<<40;
3786 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3787 expDiff -= 64;
3788 while ( 0 < expDiff ) {
3789 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3790 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3791 aSig64 = - ( ( bSig * q64 )<<38 );
3792 expDiff -= 62;
3793 }
3794 expDiff += 64;
3795 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3796 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3797 q = q64>>( 64 - expDiff );
3798 bSig <<= 6;
3799 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3800 }
3801 do {
3802 alternateASig = aSig;
3803 ++q;
3804 aSig -= bSig;
bb98fe42 3805 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3806 sigMean = aSig + alternateASig;
3807 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3808 aSig = alternateASig;
3809 }
bb98fe42 3810 zSign = ( (int32_t) aSig < 0 );
158142c2 3811 if ( zSign ) aSig = - aSig;
ff32e16e 3812 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3813}
3814
369be8f6 3815
158142c2 3816
8229c991
AJ
3817/*----------------------------------------------------------------------------
3818| Returns the binary exponential of the single-precision floating-point value
3819| `a'. The operation is performed according to the IEC/IEEE Standard for
3820| Binary Floating-Point Arithmetic.
3821|
3822| Uses the following identities:
3823|
3824| 1. -------------------------------------------------------------------------
3825| x x*ln(2)
3826| 2 = e
3827|
3828| 2. -------------------------------------------------------------------------
3829| 2 3 4 5 n
3830| x x x x x x x
3831| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3832| 1! 2! 3! 4! 5! n!
3833*----------------------------------------------------------------------------*/
3834
3835static const float64 float32_exp2_coefficients[15] =
3836{
d5138cf4
PM
3837 const_float64( 0x3ff0000000000000ll ), /* 1 */
3838 const_float64( 0x3fe0000000000000ll ), /* 2 */
3839 const_float64( 0x3fc5555555555555ll ), /* 3 */
3840 const_float64( 0x3fa5555555555555ll ), /* 4 */
3841 const_float64( 0x3f81111111111111ll ), /* 5 */
3842 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3843 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3844 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3845 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3846 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3847 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3848 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3849 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3850 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3851 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3852};
3853
e5a41ffa 3854float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3855{
3856 flag aSign;
0c48262d 3857 int aExp;
bb98fe42 3858 uint32_t aSig;
8229c991
AJ
3859 float64 r, x, xn;
3860 int i;
ff32e16e 3861 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3862
3863 aSig = extractFloat32Frac( a );
3864 aExp = extractFloat32Exp( a );
3865 aSign = extractFloat32Sign( a );
3866
3867 if ( aExp == 0xFF) {
ff32e16e
PM
3868 if (aSig) {
3869 return propagateFloat32NaN(a, float32_zero, status);
3870 }
8229c991
AJ
3871 return (aSign) ? float32_zero : a;
3872 }
3873 if (aExp == 0) {
3874 if (aSig == 0) return float32_one;
3875 }
3876
ff32e16e 3877 float_raise(float_flag_inexact, status);
8229c991
AJ
3878
3879 /* ******************************* */
3880 /* using float64 for approximation */
3881 /* ******************************* */
ff32e16e
PM
3882 x = float32_to_float64(a, status);
3883 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3884
3885 xn = x;
3886 r = float64_one;
3887 for (i = 0 ; i < 15 ; i++) {
3888 float64 f;
3889
ff32e16e
PM
3890 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3891 r = float64_add(r, f, status);
8229c991 3892
ff32e16e 3893 xn = float64_mul(xn, x, status);
8229c991
AJ
3894 }
3895
3896 return float64_to_float32(r, status);
3897}
3898
374dfc33
AJ
3899/*----------------------------------------------------------------------------
3900| Returns the binary log of the single-precision floating-point value `a'.
3901| The operation is performed according to the IEC/IEEE Standard for Binary
3902| Floating-Point Arithmetic.
3903*----------------------------------------------------------------------------*/
e5a41ffa 3904float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3905{
3906 flag aSign, zSign;
0c48262d 3907 int aExp;
bb98fe42 3908 uint32_t aSig, zSig, i;
374dfc33 3909
ff32e16e 3910 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3911 aSig = extractFloat32Frac( a );
3912 aExp = extractFloat32Exp( a );
3913 aSign = extractFloat32Sign( a );
3914
3915 if ( aExp == 0 ) {
3916 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3917 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3918 }
3919 if ( aSign ) {
ff32e16e 3920 float_raise(float_flag_invalid, status);
af39bc8c 3921 return float32_default_nan(status);
374dfc33
AJ
3922 }
3923 if ( aExp == 0xFF ) {
ff32e16e
PM
3924 if (aSig) {
3925 return propagateFloat32NaN(a, float32_zero, status);
3926 }
374dfc33
AJ
3927 return a;
3928 }
3929
3930 aExp -= 0x7F;
3931 aSig |= 0x00800000;
3932 zSign = aExp < 0;
3933 zSig = aExp << 23;
3934
3935 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3936 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3937 if ( aSig & 0x01000000 ) {
3938 aSig >>= 1;
3939 zSig |= i;
3940 }
3941 }
3942
3943 if ( zSign )
3944 zSig = -zSig;
3945
ff32e16e 3946 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3947}
3948
158142c2
FB
3949/*----------------------------------------------------------------------------
3950| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3951| the corresponding value `b', and 0 otherwise. The invalid exception is
3952| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3953| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3954*----------------------------------------------------------------------------*/
3955
e5a41ffa 3956int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3957{
b689362d 3958 uint32_t av, bv;
ff32e16e
PM
3959 a = float32_squash_input_denormal(a, status);
3960 b = float32_squash_input_denormal(b, status);
158142c2
FB
3961
3962 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3963 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3964 ) {
ff32e16e 3965 float_raise(float_flag_invalid, status);
158142c2
FB
3966 return 0;
3967 }
b689362d
AJ
3968 av = float32_val(a);
3969 bv = float32_val(b);
3970 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3971}
3972
3973/*----------------------------------------------------------------------------
3974| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3975| or equal to the corresponding value `b', and 0 otherwise. The invalid
3976| exception is raised if either operand is a NaN. The comparison is performed
3977| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3978*----------------------------------------------------------------------------*/
3979
e5a41ffa 3980int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3981{
3982 flag aSign, bSign;
bb98fe42 3983 uint32_t av, bv;
ff32e16e
PM
3984 a = float32_squash_input_denormal(a, status);
3985 b = float32_squash_input_denormal(b, status);
158142c2
FB
3986
3987 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3988 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3989 ) {
ff32e16e 3990 float_raise(float_flag_invalid, status);
158142c2
FB
3991 return 0;
3992 }
3993 aSign = extractFloat32Sign( a );
3994 bSign = extractFloat32Sign( b );
f090c9d4
PB
3995 av = float32_val(a);
3996 bv = float32_val(b);
bb98fe42 3997 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3998 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3999
4000}
4001
4002/*----------------------------------------------------------------------------
4003| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4004| the corresponding value `b', and 0 otherwise. The invalid exception is
4005| raised if either operand is a NaN. The comparison is performed according
4006| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4007*----------------------------------------------------------------------------*/
4008
e5a41ffa 4009int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4010{
4011 flag aSign, bSign;
bb98fe42 4012 uint32_t av, bv;
ff32e16e
PM
4013 a = float32_squash_input_denormal(a, status);
4014 b = float32_squash_input_denormal(b, status);
158142c2
FB
4015
4016 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4017 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4018 ) {
ff32e16e 4019 float_raise(float_flag_invalid, status);
158142c2
FB
4020 return 0;
4021 }
4022 aSign = extractFloat32Sign( a );
4023 bSign = extractFloat32Sign( b );
f090c9d4
PB
4024 av = float32_val(a);
4025 bv = float32_val(b);
bb98fe42 4026 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4027 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4028
4029}
4030
67b7861d
AJ
4031/*----------------------------------------------------------------------------
4032| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4033| be compared, and 0 otherwise. The invalid exception is raised if either
4034| operand is a NaN. The comparison is performed according to the IEC/IEEE
4035| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4036*----------------------------------------------------------------------------*/
4037
e5a41ffa 4038int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4039{
ff32e16e
PM
4040 a = float32_squash_input_denormal(a, status);
4041 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4042
4043 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4044 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4045 ) {
ff32e16e 4046 float_raise(float_flag_invalid, status);
67b7861d
AJ
4047 return 1;
4048 }
4049 return 0;
4050}
b689362d 4051
158142c2
FB
4052/*----------------------------------------------------------------------------
4053| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4054| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4055| exception. The comparison is performed according to the IEC/IEEE Standard
4056| for Binary Floating-Point Arithmetic.
158142c2
FB
4057*----------------------------------------------------------------------------*/
4058
e5a41ffa 4059int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4060{
ff32e16e
PM
4061 a = float32_squash_input_denormal(a, status);
4062 b = float32_squash_input_denormal(b, status);
158142c2
FB
4063
4064 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4065 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4066 ) {
af39bc8c
AM
4067 if (float32_is_signaling_nan(a, status)
4068 || float32_is_signaling_nan(b, status)) {
ff32e16e 4069 float_raise(float_flag_invalid, status);
b689362d 4070 }
158142c2
FB
4071 return 0;
4072 }
b689362d
AJ
4073 return ( float32_val(a) == float32_val(b) ) ||
4074 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4075}
4076
4077/*----------------------------------------------------------------------------
4078| Returns 1 if the single-precision floating-point value `a' is less than or
4079| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4080| cause an exception. Otherwise, the comparison is performed according to the
4081| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4082*----------------------------------------------------------------------------*/
4083
e5a41ffa 4084int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4085{
4086 flag aSign, bSign;
bb98fe42 4087 uint32_t av, bv;
ff32e16e
PM
4088 a = float32_squash_input_denormal(a, status);
4089 b = float32_squash_input_denormal(b, status);
158142c2
FB
4090
4091 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4092 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4093 ) {
af39bc8c
AM
4094 if (float32_is_signaling_nan(a, status)
4095 || float32_is_signaling_nan(b, status)) {
ff32e16e 4096 float_raise(float_flag_invalid, status);
158142c2
FB
4097 }
4098 return 0;
4099 }
4100 aSign = extractFloat32Sign( a );
4101 bSign = extractFloat32Sign( b );
f090c9d4
PB
4102 av = float32_val(a);
4103 bv = float32_val(b);
bb98fe42 4104 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4105 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4106
4107}
4108
4109/*----------------------------------------------------------------------------
4110| Returns 1 if the single-precision floating-point value `a' is less than
4111| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4112| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4113| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4114*----------------------------------------------------------------------------*/
4115
ab52f973 4116int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4117{
ab52f973
AB
4118 flag aSign, bSign;
4119 uint32_t av, bv;
4120 a = float32_squash_input_denormal(a, status);
4121 b = float32_squash_input_denormal(b, status);
158142c2 4122
ab52f973
AB
4123 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4124 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4125 ) {
4126 if (float32_is_signaling_nan(a, status)
4127 || float32_is_signaling_nan(b, status)) {
ff32e16e 4128 float_raise(float_flag_invalid, status);
158142c2 4129 }
ab52f973 4130 return 0;
158142c2 4131 }
ab52f973
AB
4132 aSign = extractFloat32Sign( a );
4133 bSign = extractFloat32Sign( b );
4134 av = float32_val(a);
4135 bv = float32_val(b);
4136 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4137 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4138
4139}
4140
4141/*----------------------------------------------------------------------------
ab52f973
AB
4142| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4143| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4144| comparison is performed according to the IEC/IEEE Standard for Binary
4145| Floating-Point Arithmetic.
158142c2
FB
4146*----------------------------------------------------------------------------*/
4147
ab52f973 4148int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4149{
ab52f973
AB
4150 a = float32_squash_input_denormal(a, status);
4151 b = float32_squash_input_denormal(b, status);
158142c2 4152
ab52f973
AB
4153 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4154 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4155 ) {
4156 if (float32_is_signaling_nan(a, status)
4157 || float32_is_signaling_nan(b, status)) {
4158 float_raise(float_flag_invalid, status);
158142c2 4159 }
ab52f973 4160 return 1;
158142c2 4161 }
ab52f973 4162 return 0;
158142c2
FB
4163}
4164
210cbd49
AB
4165/*----------------------------------------------------------------------------
4166| If `a' is denormal and we are in flush-to-zero mode then set the
4167| input-denormal exception and return zero. Otherwise just return the value.
4168*----------------------------------------------------------------------------*/
4169float16 float16_squash_input_denormal(float16 a, float_status *status)
4170{
4171 if (status->flush_inputs_to_zero) {
4172 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4173 float_raise(float_flag_input_denormal, status);
4174 return make_float16(float16_val(a) & 0x8000);
4175 }
4176 }
4177 return a;
4178}
4179
158142c2
FB
4180/*----------------------------------------------------------------------------
4181| Returns the result of converting the double-precision floating-point value
4182| `a' to the extended double-precision floating-point format. The conversion
4183| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4184| Arithmetic.
4185*----------------------------------------------------------------------------*/
4186
e5a41ffa 4187floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4188{
4189 flag aSign;
0c48262d 4190 int aExp;
bb98fe42 4191 uint64_t aSig;
158142c2 4192
ff32e16e 4193 a = float64_squash_input_denormal(a, status);
158142c2
FB
4194 aSig = extractFloat64Frac( a );
4195 aExp = extractFloat64Exp( a );
4196 aSign = extractFloat64Sign( a );
4197 if ( aExp == 0x7FF ) {
ff32e16e
PM
4198 if (aSig) {
4199 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4200 }
0f605c88
LV
4201 return packFloatx80(aSign,
4202 floatx80_infinity_high,
4203 floatx80_infinity_low);
158142c2
FB
4204 }
4205 if ( aExp == 0 ) {
4206 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4207 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4208 }
4209 return
4210 packFloatx80(
4211 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4212
4213}
4214
158142c2
FB
4215/*----------------------------------------------------------------------------
4216| Returns the result of converting the double-precision floating-point value
4217| `a' to the quadruple-precision floating-point format. The conversion is
4218| performed according to the IEC/IEEE Standard for Binary Floating-Point
4219| Arithmetic.
4220*----------------------------------------------------------------------------*/
4221
e5a41ffa 4222float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4223{
4224 flag aSign;
0c48262d 4225 int aExp;
bb98fe42 4226 uint64_t aSig, zSig0, zSig1;
158142c2 4227
ff32e16e 4228 a = float64_squash_input_denormal(a, status);
158142c2
FB
4229 aSig = extractFloat64Frac( a );
4230 aExp = extractFloat64Exp( a );
4231 aSign = extractFloat64Sign( a );
4232 if ( aExp == 0x7FF ) {
ff32e16e
PM
4233 if (aSig) {
4234 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4235 }
158142c2
FB
4236 return packFloat128( aSign, 0x7FFF, 0, 0 );
4237 }
4238 if ( aExp == 0 ) {
4239 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4240 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4241 --aExp;
4242 }
4243 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4244 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4245
4246}
4247
158142c2
FB
4248
4249/*----------------------------------------------------------------------------
4250| Returns the remainder of the double-precision floating-point value `a'
4251| with respect to the corresponding value `b'. The operation is performed
4252| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4253*----------------------------------------------------------------------------*/
4254
e5a41ffa 4255float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4256{
ed086f3d 4257 flag aSign, zSign;
0c48262d 4258 int aExp, bExp, expDiff;
bb98fe42
AF
4259 uint64_t aSig, bSig;
4260 uint64_t q, alternateASig;
4261 int64_t sigMean;
158142c2 4262
ff32e16e
PM
4263 a = float64_squash_input_denormal(a, status);
4264 b = float64_squash_input_denormal(b, status);
158142c2
FB
4265 aSig = extractFloat64Frac( a );
4266 aExp = extractFloat64Exp( a );
4267 aSign = extractFloat64Sign( a );
4268 bSig = extractFloat64Frac( b );
4269 bExp = extractFloat64Exp( b );
158142c2
FB
4270 if ( aExp == 0x7FF ) {
4271 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4272 return propagateFloat64NaN(a, b, status);
158142c2 4273 }
ff32e16e 4274 float_raise(float_flag_invalid, status);
af39bc8c 4275 return float64_default_nan(status);
158142c2
FB
4276 }
4277 if ( bExp == 0x7FF ) {
ff32e16e
PM
4278 if (bSig) {
4279 return propagateFloat64NaN(a, b, status);
4280 }
158142c2
FB
4281 return a;
4282 }
4283 if ( bExp == 0 ) {
4284 if ( bSig == 0 ) {
ff32e16e 4285 float_raise(float_flag_invalid, status);
af39bc8c 4286 return float64_default_nan(status);
158142c2
FB
4287 }
4288 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4289 }
4290 if ( aExp == 0 ) {
4291 if ( aSig == 0 ) return a;
4292 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4293 }
4294 expDiff = aExp - bExp;
4295 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4296 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4297 if ( expDiff < 0 ) {
4298 if ( expDiff < -1 ) return a;
4299 aSig >>= 1;
4300 }
4301 q = ( bSig <= aSig );
4302 if ( q ) aSig -= bSig;
4303 expDiff -= 64;
4304 while ( 0 < expDiff ) {
4305 q = estimateDiv128To64( aSig, 0, bSig );
4306 q = ( 2 < q ) ? q - 2 : 0;
4307 aSig = - ( ( bSig>>2 ) * q );
4308 expDiff -= 62;
4309 }
4310 expDiff += 64;
4311 if ( 0 < expDiff ) {
4312 q = estimateDiv128To64( aSig, 0, bSig );
4313 q = ( 2 < q ) ? q - 2 : 0;
4314 q >>= 64 - expDiff;
4315 bSig >>= 2;
4316 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4317 }
4318 else {
4319 aSig >>= 2;
4320 bSig >>= 2;
4321 }
4322 do {
4323 alternateASig = aSig;
4324 ++q;
4325 aSig -= bSig;
bb98fe42 4326 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4327 sigMean = aSig + alternateASig;
4328 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4329 aSig = alternateASig;
4330 }
bb98fe42 4331 zSign = ( (int64_t) aSig < 0 );
158142c2 4332 if ( zSign ) aSig = - aSig;
ff32e16e 4333 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4334
4335}
4336
374dfc33
AJ
4337/*----------------------------------------------------------------------------
4338| Returns the binary log of the double-precision floating-point value `a'.
4339| The operation is performed according to the IEC/IEEE Standard for Binary
4340| Floating-Point Arithmetic.
4341*----------------------------------------------------------------------------*/
e5a41ffa 4342float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4343{
4344 flag aSign, zSign;
0c48262d 4345 int aExp;
bb98fe42 4346 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4347 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4348
4349 aSig = extractFloat64Frac( a );
4350 aExp = extractFloat64Exp( a );
4351 aSign = extractFloat64Sign( a );
4352
4353 if ( aExp == 0 ) {
4354 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4355 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4356 }
4357 if ( aSign ) {
ff32e16e 4358 float_raise(float_flag_invalid, status);
af39bc8c 4359 return float64_default_nan(status);
374dfc33
AJ
4360 }
4361 if ( aExp == 0x7FF ) {
ff32e16e
PM
4362 if (aSig) {
4363 return propagateFloat64NaN(a, float64_zero, status);
4364 }
374dfc33
AJ
4365 return a;
4366 }
4367
4368 aExp -= 0x3FF;
4369 aSig |= LIT64( 0x0010000000000000 );
4370 zSign = aExp < 0;
bb98fe42 4371 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4372 for (i = 1LL << 51; i > 0; i >>= 1) {
4373 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4374 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4375 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4376 aSig >>= 1;
4377 zSig |= i;
4378 }
4379 }
4380
4381 if ( zSign )
4382 zSig = -zSig;
ff32e16e 4383 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4384}
4385
158142c2
FB
4386/*----------------------------------------------------------------------------
4387| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4388| corresponding value `b', and 0 otherwise. The invalid exception is raised
4389| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4390| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4391*----------------------------------------------------------------------------*/
4392
e5a41ffa 4393int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4394{
bb98fe42 4395 uint64_t av, bv;
ff32e16e
PM
4396 a = float64_squash_input_denormal(a, status);
4397 b = float64_squash_input_denormal(b, status);
158142c2
FB
4398
4399 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4400 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4401 ) {
ff32e16e 4402 float_raise(float_flag_invalid, status);
158142c2
FB
4403 return 0;
4404 }
f090c9d4 4405 av = float64_val(a);
a1b91bb4 4406 bv = float64_val(b);
bb98fe42 4407 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4408
4409}
4410
4411/*----------------------------------------------------------------------------
4412| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4413| equal to the corresponding value `b', and 0 otherwise. The invalid
4414| exception is raised if either operand is a NaN. The comparison is performed
4415| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4416*----------------------------------------------------------------------------*/
4417
e5a41ffa 4418int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4419{
4420 flag aSign, bSign;
bb98fe42 4421 uint64_t av, bv;
ff32e16e
PM
4422 a = float64_squash_input_denormal(a, status);
4423 b = float64_squash_input_denormal(b, status);
158142c2
FB
4424
4425 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4426 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4427 ) {
ff32e16e 4428 float_raise(float_flag_invalid, status);
158142c2
FB
4429 return 0;
4430 }
4431 aSign = extractFloat64Sign( a );
4432 bSign = extractFloat64Sign( b );
f090c9d4 4433 av = float64_val(a);
a1b91bb4 4434 bv = float64_val(b);
bb98fe42 4435 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4436 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4437
4438}
4439
4440/*----------------------------------------------------------------------------
4441| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4442| the corresponding value `b', and 0 otherwise. The invalid exception is
4443| raised if either operand is a NaN. The comparison is performed according
4444| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4445*----------------------------------------------------------------------------*/
4446
e5a41ffa 4447int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4448{
4449 flag aSign, bSign;
bb98fe42 4450 uint64_t av, bv;
158142c2 4451
ff32e16e
PM
4452 a = float64_squash_input_denormal(a, status);
4453 b = float64_squash_input_denormal(b, status);
158142c2
FB
4454 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4455 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4456 ) {
ff32e16e 4457 float_raise(float_flag_invalid, status);
158142c2
FB
4458 return 0;
4459 }
4460 aSign = extractFloat64Sign( a );
4461 bSign = extractFloat64Sign( b );
f090c9d4 4462 av = float64_val(a);
a1b91bb4 4463 bv = float64_val(b);
bb98fe42 4464 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4465 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4466
4467}
4468
67b7861d
AJ
4469/*----------------------------------------------------------------------------
4470| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4471| be compared, and 0 otherwise. The invalid exception is raised if either
4472| operand is a NaN. The comparison is performed according to the IEC/IEEE
4473| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4474*----------------------------------------------------------------------------*/
4475
e5a41ffa 4476int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4477{
ff32e16e
PM
4478 a = float64_squash_input_denormal(a, status);
4479 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4480
4481 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4482 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4483 ) {
ff32e16e 4484 float_raise(float_flag_invalid, status);
67b7861d
AJ
4485 return 1;
4486 }
4487 return 0;
4488}
4489
158142c2
FB
4490/*----------------------------------------------------------------------------
4491| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4492| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4493| exception.The comparison is performed according to the IEC/IEEE Standard
4494| for Binary Floating-Point Arithmetic.
158142c2
FB
4495*----------------------------------------------------------------------------*/
4496
e5a41ffa 4497int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4498{
bb98fe42 4499 uint64_t av, bv;
ff32e16e
PM
4500 a = float64_squash_input_denormal(a, status);
4501 b = float64_squash_input_denormal(b, status);
158142c2
FB
4502
4503 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4504 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4505 ) {
af39bc8c
AM
4506 if (float64_is_signaling_nan(a, status)
4507 || float64_is_signaling_nan(b, status)) {
ff32e16e 4508 float_raise(float_flag_invalid, status);
b689362d 4509 }
158142c2
FB
4510 return 0;
4511 }
f090c9d4 4512 av = float64_val(a);
a1b91bb4 4513 bv = float64_val(b);
bb98fe42 4514 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4515
4516}
4517
4518/*----------------------------------------------------------------------------
4519| Returns 1 if the double-precision floating-point value `a' is less than or
4520| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4521| cause an exception. Otherwise, the comparison is performed according to the
4522| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4523*----------------------------------------------------------------------------*/
4524
e5a41ffa 4525int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4526{
4527 flag aSign, bSign;
bb98fe42 4528 uint64_t av, bv;
ff32e16e
PM
4529 a = float64_squash_input_denormal(a, status);
4530 b = float64_squash_input_denormal(b, status);
158142c2
FB
4531
4532 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4533 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4534 ) {
af39bc8c
AM
4535 if (float64_is_signaling_nan(a, status)
4536 || float64_is_signaling_nan(b, status)) {
ff32e16e 4537 float_raise(float_flag_invalid, status);
158142c2
FB
4538 }
4539 return 0;
4540 }
4541 aSign = extractFloat64Sign( a );
4542 bSign = extractFloat64Sign( b );
f090c9d4 4543 av = float64_val(a);
a1b91bb4 4544 bv = float64_val(b);
bb98fe42 4545 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4546 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4547
4548}
4549
4550/*----------------------------------------------------------------------------
4551| Returns 1 if the double-precision floating-point value `a' is less than
4552| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4553| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4554| Standard for Binary Floating-Point Arithmetic.
4555*----------------------------------------------------------------------------*/
4556
e5a41ffa 4557int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4558{
4559 flag aSign, bSign;
bb98fe42 4560 uint64_t av, bv;
ff32e16e
PM
4561 a = float64_squash_input_denormal(a, status);
4562 b = float64_squash_input_denormal(b, status);
158142c2
FB
4563
4564 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4565 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4566 ) {
af39bc8c
AM
4567 if (float64_is_signaling_nan(a, status)
4568 || float64_is_signaling_nan(b, status)) {
ff32e16e 4569 float_raise(float_flag_invalid, status);
158142c2
FB
4570 }
4571 return 0;
4572 }
4573 aSign = extractFloat64Sign( a );
4574 bSign = extractFloat64Sign( b );
f090c9d4 4575 av = float64_val(a);
a1b91bb4 4576 bv = float64_val(b);
bb98fe42 4577 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4578 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4579
4580}
4581
67b7861d
AJ
4582/*----------------------------------------------------------------------------
4583| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4584| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4585| comparison is performed according to the IEC/IEEE Standard for Binary
4586| Floating-Point Arithmetic.
4587*----------------------------------------------------------------------------*/
4588
e5a41ffa 4589int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4590{
ff32e16e
PM
4591 a = float64_squash_input_denormal(a, status);
4592 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4593
4594 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4595 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4596 ) {
af39bc8c
AM
4597 if (float64_is_signaling_nan(a, status)
4598 || float64_is_signaling_nan(b, status)) {
ff32e16e 4599 float_raise(float_flag_invalid, status);
67b7861d
AJ
4600 }
4601 return 1;
4602 }
4603 return 0;
4604}
4605
158142c2
FB
4606/*----------------------------------------------------------------------------
4607| Returns the result of converting the extended double-precision floating-
4608| point value `a' to the 32-bit two's complement integer format. The
4609| conversion is performed according to the IEC/IEEE Standard for Binary
4610| Floating-Point Arithmetic---which means in particular that the conversion
4611| is rounded according to the current rounding mode. If `a' is a NaN, the
4612| largest positive integer is returned. Otherwise, if the conversion
4613| overflows, the largest integer with the same sign as `a' is returned.
4614*----------------------------------------------------------------------------*/
4615
f4014512 4616int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4617{
4618 flag aSign;
f4014512 4619 int32_t aExp, shiftCount;
bb98fe42 4620 uint64_t aSig;
158142c2 4621
d1eb8f2a
AD
4622 if (floatx80_invalid_encoding(a)) {
4623 float_raise(float_flag_invalid, status);
4624 return 1 << 31;
4625 }
158142c2
FB
4626 aSig = extractFloatx80Frac( a );
4627 aExp = extractFloatx80Exp( a );
4628 aSign = extractFloatx80Sign( a );
bb98fe42 4629 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4630 shiftCount = 0x4037 - aExp;
4631 if ( shiftCount <= 0 ) shiftCount = 1;
4632 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4633 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4634
4635}
4636
4637/*----------------------------------------------------------------------------
4638| Returns the result of converting the extended double-precision floating-
4639| point value `a' to the 32-bit two's complement integer format. The
4640| conversion is performed according to the IEC/IEEE Standard for Binary
4641| Floating-Point Arithmetic, except that the conversion is always rounded
4642| toward zero. If `a' is a NaN, the largest positive integer is returned.
4643| Otherwise, if the conversion overflows, the largest integer with the same
4644| sign as `a' is returned.
4645*----------------------------------------------------------------------------*/
4646
f4014512 4647int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4648{
4649 flag aSign;
f4014512 4650 int32_t aExp, shiftCount;
bb98fe42 4651 uint64_t aSig, savedASig;
b3a6a2e0 4652 int32_t z;
158142c2 4653
d1eb8f2a
AD
4654 if (floatx80_invalid_encoding(a)) {
4655 float_raise(float_flag_invalid, status);
4656 return 1 << 31;
4657 }
158142c2
FB
4658 aSig = extractFloatx80Frac( a );
4659 aExp = extractFloatx80Exp( a );
4660 aSign = extractFloatx80Sign( a );
4661 if ( 0x401E < aExp ) {
bb98fe42 4662 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4663 goto invalid;
4664 }
4665 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4666 if (aExp || aSig) {
4667 status->float_exception_flags |= float_flag_inexact;
4668 }
158142c2
FB
4669 return 0;
4670 }
4671 shiftCount = 0x403E - aExp;
4672 savedASig = aSig;
4673 aSig >>= shiftCount;
4674 z = aSig;
4675 if ( aSign ) z = - z;
4676 if ( ( z < 0 ) ^ aSign ) {
4677 invalid:
ff32e16e 4678 float_raise(float_flag_invalid, status);
bb98fe42 4679 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4680 }
4681 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4682 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4683 }
4684 return z;
4685
4686}
4687
4688/*----------------------------------------------------------------------------
4689| Returns the result of converting the extended double-precision floating-
4690| point value `a' to the 64-bit two's complement integer format. The
4691| conversion is performed according to the IEC/IEEE Standard for Binary
4692| Floating-Point Arithmetic---which means in particular that the conversion
4693| is rounded according to the current rounding mode. If `a' is a NaN,
4694| the largest positive integer is returned. Otherwise, if the conversion
4695| overflows, the largest integer with the same sign as `a' is returned.
4696*----------------------------------------------------------------------------*/
4697
f42c2224 4698int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4699{
4700 flag aSign;
f4014512 4701 int32_t aExp, shiftCount;
bb98fe42 4702 uint64_t aSig, aSigExtra;
158142c2 4703
d1eb8f2a
AD
4704 if (floatx80_invalid_encoding(a)) {
4705 float_raise(float_flag_invalid, status);
4706 return 1ULL << 63;
4707 }
158142c2
FB
4708 aSig = extractFloatx80Frac( a );
4709 aExp = extractFloatx80Exp( a );
4710 aSign = extractFloatx80Sign( a );
4711 shiftCount = 0x403E - aExp;
4712 if ( shiftCount <= 0 ) {
4713 if ( shiftCount ) {
ff32e16e 4714 float_raise(float_flag_invalid, status);
0f605c88 4715 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4716 return LIT64( 0x7FFFFFFFFFFFFFFF );
4717 }
bb98fe42 4718 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4719 }
4720 aSigExtra = 0;
4721 }
4722 else {
4723 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4724 }
ff32e16e 4725 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4726
4727}
4728
4729/*----------------------------------------------------------------------------
4730| Returns the result of converting the extended double-precision floating-
4731| point value `a' to the 64-bit two's complement integer format. The
4732| conversion is performed according to the IEC/IEEE Standard for Binary
4733| Floating-Point Arithmetic, except that the conversion is always rounded
4734| toward zero. If `a' is a NaN, the largest positive integer is returned.
4735| Otherwise, if the conversion overflows, the largest integer with the same
4736| sign as `a' is returned.
4737*----------------------------------------------------------------------------*/
4738
f42c2224 4739int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4740{
4741 flag aSign;
f4014512 4742 int32_t aExp, shiftCount;
bb98fe42 4743 uint64_t aSig;
f42c2224 4744 int64_t z;
158142c2 4745
d1eb8f2a
AD
4746 if (floatx80_invalid_encoding(a)) {
4747 float_raise(float_flag_invalid, status);
4748 return 1ULL << 63;
4749 }
158142c2
FB
4750 aSig = extractFloatx80Frac( a );
4751 aExp = extractFloatx80Exp( a );
4752 aSign = extractFloatx80Sign( a );
4753 shiftCount = aExp - 0x403E;
4754 if ( 0 <= shiftCount ) {
4755 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4756 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4757 float_raise(float_flag_invalid, status);
158142c2
FB
4758 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4759 return LIT64( 0x7FFFFFFFFFFFFFFF );
4760 }
4761 }
bb98fe42 4762 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4763 }
4764 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4765 if (aExp | aSig) {
4766 status->float_exception_flags |= float_flag_inexact;
4767 }
158142c2
FB
4768 return 0;
4769 }
4770 z = aSig>>( - shiftCount );
bb98fe42 4771 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4772 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4773 }
4774 if ( aSign ) z = - z;
4775 return z;
4776
4777}
4778
4779/*----------------------------------------------------------------------------
4780| Returns the result of converting the extended double-precision floating-
4781| point value `a' to the single-precision floating-point format. The
4782| conversion is performed according to the IEC/IEEE Standard for Binary
4783| Floating-Point Arithmetic.
4784*----------------------------------------------------------------------------*/
4785
e5a41ffa 4786float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4787{
4788 flag aSign;
f4014512 4789 int32_t aExp;
bb98fe42 4790 uint64_t aSig;
158142c2 4791
d1eb8f2a
AD
4792 if (floatx80_invalid_encoding(a)) {
4793 float_raise(float_flag_invalid, status);
4794 return float32_default_nan(status);
4795 }
158142c2
FB
4796 aSig = extractFloatx80Frac( a );
4797 aExp = extractFloatx80Exp( a );
4798 aSign = extractFloatx80Sign( a );
4799 if ( aExp == 0x7FFF ) {
bb98fe42 4800 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4801 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4802 }
4803 return packFloat32( aSign, 0xFF, 0 );
4804 }
4805 shift64RightJamming( aSig, 33, &aSig );
4806 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4807 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4808
4809}
4810
4811/*----------------------------------------------------------------------------
4812| Returns the result of converting the extended double-precision floating-
4813| point value `a' to the double-precision floating-point format. The
4814| conversion is performed according to the IEC/IEEE Standard for Binary
4815| Floating-Point Arithmetic.
4816*----------------------------------------------------------------------------*/
4817
e5a41ffa 4818float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4819{
4820 flag aSign;
f4014512 4821 int32_t aExp;
bb98fe42 4822 uint64_t aSig, zSig;
158142c2 4823
d1eb8f2a
AD
4824 if (floatx80_invalid_encoding(a)) {
4825 float_raise(float_flag_invalid, status);
4826 return float64_default_nan(status);
4827 }
158142c2
FB
4828 aSig = extractFloatx80Frac( a );
4829 aExp = extractFloatx80Exp( a );
4830 aSign = extractFloatx80Sign( a );
4831 if ( aExp == 0x7FFF ) {
bb98fe42 4832 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4833 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4834 }
4835 return packFloat64( aSign, 0x7FF, 0 );
4836 }
4837 shift64RightJamming( aSig, 1, &zSig );
4838 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4839 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4840
4841}
4842
158142c2
FB
4843/*----------------------------------------------------------------------------
4844| Returns the result of converting the extended double-precision floating-
4845| point value `a' to the quadruple-precision floating-point format. The
4846| conversion is performed according to the IEC/IEEE Standard for Binary
4847| Floating-Point Arithmetic.
4848*----------------------------------------------------------------------------*/
4849
e5a41ffa 4850float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4851{
4852 flag aSign;
0c48262d 4853 int aExp;
bb98fe42 4854 uint64_t aSig, zSig0, zSig1;
158142c2 4855
d1eb8f2a
AD
4856 if (floatx80_invalid_encoding(a)) {
4857 float_raise(float_flag_invalid, status);
4858 return float128_default_nan(status);
4859 }
158142c2
FB
4860 aSig = extractFloatx80Frac( a );
4861 aExp = extractFloatx80Exp( a );
4862 aSign = extractFloatx80Sign( a );
bb98fe42 4863 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4864 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4865 }
4866 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4867 return packFloat128( aSign, aExp, zSig0, zSig1 );
4868
4869}
4870
0f721292
LV
4871/*----------------------------------------------------------------------------
4872| Rounds the extended double-precision floating-point value `a'
4873| to the precision provided by floatx80_rounding_precision and returns the
4874| result as an extended double-precision floating-point value.
4875| The operation is performed according to the IEC/IEEE Standard for Binary
4876| Floating-Point Arithmetic.
4877*----------------------------------------------------------------------------*/
4878
4879floatx80 floatx80_round(floatx80 a, float_status *status)
4880{
4881 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4882 extractFloatx80Sign(a),
4883 extractFloatx80Exp(a),
4884 extractFloatx80Frac(a), 0, status);
4885}
4886
158142c2
FB
4887/*----------------------------------------------------------------------------
4888| Rounds the extended double-precision floating-point value `a' to an integer,
4889| and returns the result as an extended quadruple-precision floating-point
4890| value. The operation is performed according to the IEC/IEEE Standard for
4891| Binary Floating-Point Arithmetic.
4892*----------------------------------------------------------------------------*/
4893
e5a41ffa 4894floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4895{
4896 flag aSign;
f4014512 4897 int32_t aExp;
bb98fe42 4898 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4899 floatx80 z;
4900
d1eb8f2a
AD
4901 if (floatx80_invalid_encoding(a)) {
4902 float_raise(float_flag_invalid, status);
4903 return floatx80_default_nan(status);
4904 }
158142c2
FB
4905 aExp = extractFloatx80Exp( a );
4906 if ( 0x403E <= aExp ) {
bb98fe42 4907 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4908 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4909 }
4910 return a;
4911 }
4912 if ( aExp < 0x3FFF ) {
4913 if ( ( aExp == 0 )
bb98fe42 4914 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4915 return a;
4916 }
a2f2d288 4917 status->float_exception_flags |= float_flag_inexact;
158142c2 4918 aSign = extractFloatx80Sign( a );
a2f2d288 4919 switch (status->float_rounding_mode) {
158142c2 4920 case float_round_nearest_even:
bb98fe42 4921 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4922 ) {
4923 return
4924 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4925 }
4926 break;
f9288a76
PM
4927 case float_round_ties_away:
4928 if (aExp == 0x3FFE) {
4929 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4930 }
4931 break;
158142c2
FB
4932 case float_round_down:
4933 return
4934 aSign ?
4935 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4936 : packFloatx80( 0, 0, 0 );
4937 case float_round_up:
4938 return
4939 aSign ? packFloatx80( 1, 0, 0 )
4940 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4941 }
4942 return packFloatx80( aSign, 0, 0 );
4943 }
4944 lastBitMask = 1;
4945 lastBitMask <<= 0x403E - aExp;
4946 roundBitsMask = lastBitMask - 1;
4947 z = a;
a2f2d288 4948 switch (status->float_rounding_mode) {
dc355b76 4949 case float_round_nearest_even:
158142c2 4950 z.low += lastBitMask>>1;
dc355b76
PM
4951 if ((z.low & roundBitsMask) == 0) {
4952 z.low &= ~lastBitMask;
4953 }
4954 break;
f9288a76
PM
4955 case float_round_ties_away:
4956 z.low += lastBitMask >> 1;
4957 break;
dc355b76
PM
4958 case float_round_to_zero:
4959 break;
4960 case float_round_up:
4961 if (!extractFloatx80Sign(z)) {
4962 z.low += roundBitsMask;
4963 }
4964 break;
4965 case float_round_down:
4966 if (extractFloatx80Sign(z)) {
158142c2
FB
4967 z.low += roundBitsMask;
4968 }
dc355b76
PM
4969 break;
4970 default:
4971 abort();
158142c2
FB
4972 }
4973 z.low &= ~ roundBitsMask;
4974 if ( z.low == 0 ) {
4975 ++z.high;
4976 z.low = LIT64( 0x8000000000000000 );
4977 }
a2f2d288
PM
4978 if (z.low != a.low) {
4979 status->float_exception_flags |= float_flag_inexact;
4980 }
158142c2
FB
4981 return z;
4982
4983}
4984
4985/*----------------------------------------------------------------------------
4986| Returns the result of adding the absolute values of the extended double-
4987| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4988| negated before being returned. `zSign' is ignored if the result is a NaN.
4989| The addition is performed according to the IEC/IEEE Standard for Binary
4990| Floating-Point Arithmetic.
4991*----------------------------------------------------------------------------*/
4992
e5a41ffa
PM
4993static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4994 float_status *status)
158142c2 4995{
f4014512 4996 int32_t aExp, bExp, zExp;
bb98fe42 4997 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4998 int32_t expDiff;
158142c2
FB
4999
5000 aSig = extractFloatx80Frac( a );
5001 aExp = extractFloatx80Exp( a );
5002 bSig = extractFloatx80Frac( b );
5003 bExp = extractFloatx80Exp( b );
5004 expDiff = aExp - bExp;
5005 if ( 0 < expDiff ) {
5006 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5007 if ((uint64_t)(aSig << 1)) {
5008 return propagateFloatx80NaN(a, b, status);
5009 }
158142c2
FB
5010 return a;
5011 }
5012 if ( bExp == 0 ) --expDiff;
5013 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5014 zExp = aExp;
5015 }
5016 else if ( expDiff < 0 ) {
5017 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5018 if ((uint64_t)(bSig << 1)) {
5019 return propagateFloatx80NaN(a, b, status);
5020 }
0f605c88
LV
5021 return packFloatx80(zSign,
5022 floatx80_infinity_high,
5023 floatx80_infinity_low);
158142c2
FB
5024 }
5025 if ( aExp == 0 ) ++expDiff;
5026 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5027 zExp = bExp;
5028 }
5029 else {
5030 if ( aExp == 0x7FFF ) {
bb98fe42 5031 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5032 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5033 }
5034 return a;
5035 }
5036 zSig1 = 0;
5037 zSig0 = aSig + bSig;
5038 if ( aExp == 0 ) {
5039 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5040 goto roundAndPack;
5041 }
5042 zExp = aExp;
5043 goto shiftRight1;
5044 }
5045 zSig0 = aSig + bSig;
bb98fe42 5046 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5047 shiftRight1:
5048 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5049 zSig0 |= LIT64( 0x8000000000000000 );
5050 ++zExp;
5051 roundAndPack:
a2f2d288 5052 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5053 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5054}
5055
5056/*----------------------------------------------------------------------------
5057| Returns the result of subtracting the absolute values of the extended
5058| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5059| difference is negated before being returned. `zSign' is ignored if the
5060| result is a NaN. The subtraction is performed according to the IEC/IEEE
5061| Standard for Binary Floating-Point Arithmetic.
5062*----------------------------------------------------------------------------*/
5063
e5a41ffa
PM
5064static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5065 float_status *status)
158142c2 5066{
f4014512 5067 int32_t aExp, bExp, zExp;
bb98fe42 5068 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5069 int32_t expDiff;
158142c2
FB
5070
5071 aSig = extractFloatx80Frac( a );
5072 aExp = extractFloatx80Exp( a );
5073 bSig = extractFloatx80Frac( b );
5074 bExp = extractFloatx80Exp( b );
5075 expDiff = aExp - bExp;
5076 if ( 0 < expDiff ) goto aExpBigger;
5077 if ( expDiff < 0 ) goto bExpBigger;
5078 if ( aExp == 0x7FFF ) {
bb98fe42 5079 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5080 return propagateFloatx80NaN(a, b, status);
158142c2 5081 }
ff32e16e 5082 float_raise(float_flag_invalid, status);
af39bc8c 5083 return floatx80_default_nan(status);
158142c2
FB
5084 }
5085 if ( aExp == 0 ) {
5086 aExp = 1;
5087 bExp = 1;
5088 }
5089 zSig1 = 0;
5090 if ( bSig < aSig ) goto aBigger;
5091 if ( aSig < bSig ) goto bBigger;
a2f2d288 5092 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5093 bExpBigger:
5094 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5095 if ((uint64_t)(bSig << 1)) {
5096 return propagateFloatx80NaN(a, b, status);
5097 }
0f605c88
LV
5098 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5099 floatx80_infinity_low);
158142c2
FB
5100 }
5101 if ( aExp == 0 ) ++expDiff;
5102 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5103 bBigger:
5104 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5105 zExp = bExp;
5106 zSign ^= 1;
5107 goto normalizeRoundAndPack;
5108 aExpBigger:
5109 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5110 if ((uint64_t)(aSig << 1)) {
5111 return propagateFloatx80NaN(a, b, status);
5112 }
158142c2
FB
5113 return a;
5114 }
5115 if ( bExp == 0 ) --expDiff;
5116 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5117 aBigger:
5118 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5119 zExp = aExp;
5120 normalizeRoundAndPack:
a2f2d288 5121 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5122 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5123}
5124
5125/*----------------------------------------------------------------------------
5126| Returns the result of adding the extended double-precision floating-point
5127| values `a' and `b'. The operation is performed according to the IEC/IEEE
5128| Standard for Binary Floating-Point Arithmetic.
5129*----------------------------------------------------------------------------*/
5130
e5a41ffa 5131floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5132{
5133 flag aSign, bSign;
5134
d1eb8f2a
AD
5135 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5136 float_raise(float_flag_invalid, status);
5137 return floatx80_default_nan(status);
5138 }
158142c2
FB
5139 aSign = extractFloatx80Sign( a );
5140 bSign = extractFloatx80Sign( b );
5141 if ( aSign == bSign ) {
ff32e16e 5142 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5143 }
5144 else {
ff32e16e 5145 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5146 }
5147
5148}
5149
5150/*----------------------------------------------------------------------------
5151| Returns the result of subtracting the extended double-precision floating-
5152| point values `a' and `b'. The operation is performed according to the
5153| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5154*----------------------------------------------------------------------------*/
5155
e5a41ffa 5156floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5157{
5158 flag aSign, bSign;
5159
d1eb8f2a
AD
5160 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5161 float_raise(float_flag_invalid, status);
5162 return floatx80_default_nan(status);
5163 }
158142c2
FB
5164 aSign = extractFloatx80Sign( a );
5165 bSign = extractFloatx80Sign( b );
5166 if ( aSign == bSign ) {
ff32e16e 5167 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5168 }
5169 else {
ff32e16e 5170 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5171 }
5172
5173}
5174
5175/*----------------------------------------------------------------------------
5176| Returns the result of multiplying the extended double-precision floating-
5177| point values `a' and `b'. The operation is performed according to the
5178| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5179*----------------------------------------------------------------------------*/
5180
e5a41ffa 5181floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5182{
5183 flag aSign, bSign, zSign;
f4014512 5184 int32_t aExp, bExp, zExp;
bb98fe42 5185 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5186
d1eb8f2a
AD
5187 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5188 float_raise(float_flag_invalid, status);
5189 return floatx80_default_nan(status);
5190 }
158142c2
FB
5191 aSig = extractFloatx80Frac( a );
5192 aExp = extractFloatx80Exp( a );
5193 aSign = extractFloatx80Sign( a );
5194 bSig = extractFloatx80Frac( b );
5195 bExp = extractFloatx80Exp( b );
5196 bSign = extractFloatx80Sign( b );
5197 zSign = aSign ^ bSign;
5198 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5199 if ( (uint64_t) ( aSig<<1 )
5200 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5201 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5202 }
5203 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5204 return packFloatx80(zSign, floatx80_infinity_high,
5205 floatx80_infinity_low);
158142c2
FB
5206 }
5207 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5208 if ((uint64_t)(bSig << 1)) {
5209 return propagateFloatx80NaN(a, b, status);
5210 }
158142c2
FB
5211 if ( ( aExp | aSig ) == 0 ) {
5212 invalid:
ff32e16e 5213 float_raise(float_flag_invalid, status);
af39bc8c 5214 return floatx80_default_nan(status);
158142c2 5215 }
0f605c88
LV
5216 return packFloatx80(zSign, floatx80_infinity_high,
5217 floatx80_infinity_low);
158142c2
FB
5218 }
5219 if ( aExp == 0 ) {
5220 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5221 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5222 }
5223 if ( bExp == 0 ) {
5224 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5225 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5226 }
5227 zExp = aExp + bExp - 0x3FFE;
5228 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5229 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5230 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5231 --zExp;
5232 }
a2f2d288 5233 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5234 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5235}
5236
5237/*----------------------------------------------------------------------------
5238| Returns the result of dividing the extended double-precision floating-point
5239| value `a' by the corresponding value `b'. The operation is performed
5240| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5241*----------------------------------------------------------------------------*/
5242
e5a41ffa 5243floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5244{
5245 flag aSign, bSign, zSign;
f4014512 5246 int32_t aExp, bExp, zExp;
bb98fe42
AF
5247 uint64_t aSig, bSig, zSig0, zSig1;
5248 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5249
d1eb8f2a
AD
5250 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5251 float_raise(float_flag_invalid, status);
5252 return floatx80_default_nan(status);
5253 }
158142c2
FB
5254 aSig = extractFloatx80Frac( a );
5255 aExp = extractFloatx80Exp( a );
5256 aSign = extractFloatx80Sign( a );
5257 bSig = extractFloatx80Frac( b );
5258 bExp = extractFloatx80Exp( b );
5259 bSign = extractFloatx80Sign( b );
5260 zSign = aSign ^ bSign;
5261 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5262 if ((uint64_t)(aSig << 1)) {
5263 return propagateFloatx80NaN(a, b, status);
5264 }
158142c2 5265 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5266 if ((uint64_t)(bSig << 1)) {
5267 return propagateFloatx80NaN(a, b, status);
5268 }
158142c2
FB
5269 goto invalid;
5270 }
0f605c88
LV
5271 return packFloatx80(zSign, floatx80_infinity_high,
5272 floatx80_infinity_low);
158142c2
FB
5273 }
5274 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5275 if ((uint64_t)(bSig << 1)) {
5276 return propagateFloatx80NaN(a, b, status);
5277 }
158142c2
FB
5278 return packFloatx80( zSign, 0, 0 );
5279 }
5280 if ( bExp == 0 ) {
5281 if ( bSig == 0 ) {
5282 if ( ( aExp | aSig ) == 0 ) {
5283 invalid:
ff32e16e 5284 float_raise(float_flag_invalid, status);
af39bc8c 5285 return floatx80_default_nan(status);
158142c2 5286 }
ff32e16e 5287 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5288 return packFloatx80(zSign, floatx80_infinity_high,
5289 floatx80_infinity_low);
158142c2
FB
5290 }
5291 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5292 }
5293 if ( aExp == 0 ) {
5294 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5295 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5296 }
5297 zExp = aExp - bExp + 0x3FFE;
5298 rem1 = 0;
5299 if ( bSig <= aSig ) {
5300 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5301 ++zExp;
5302 }
5303 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5304 mul64To128( bSig, zSig0, &term0, &term1 );
5305 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5306 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5307 --zSig0;
5308 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5309 }
5310 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5311 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5312 mul64To128( bSig, zSig1, &term1, &term2 );
5313 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5314 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5315 --zSig1;
5316 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5317 }
5318 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5319 }
a2f2d288 5320 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5321 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5322}
5323
5324/*----------------------------------------------------------------------------
5325| Returns the remainder of the extended double-precision floating-point value
5326| `a' with respect to the corresponding value `b'. The operation is performed
5327| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5328*----------------------------------------------------------------------------*/
5329
e5a41ffa 5330floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5331{
ed086f3d 5332 flag aSign, zSign;
f4014512 5333 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5334 uint64_t aSig0, aSig1, bSig;
5335 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5336
d1eb8f2a
AD
5337 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5338 float_raise(float_flag_invalid, status);
5339 return floatx80_default_nan(status);
5340 }
158142c2
FB
5341 aSig0 = extractFloatx80Frac( a );
5342 aExp = extractFloatx80Exp( a );
5343 aSign = extractFloatx80Sign( a );
5344 bSig = extractFloatx80Frac( b );
5345 bExp = extractFloatx80Exp( b );
158142c2 5346 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5347 if ( (uint64_t) ( aSig0<<1 )
5348 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5349 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5350 }
5351 goto invalid;
5352 }
5353 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5354 if ((uint64_t)(bSig << 1)) {
5355 return propagateFloatx80NaN(a, b, status);
5356 }
158142c2
FB
5357 return a;
5358 }
5359 if ( bExp == 0 ) {
5360 if ( bSig == 0 ) {
5361 invalid:
ff32e16e 5362 float_raise(float_flag_invalid, status);
af39bc8c 5363 return floatx80_default_nan(status);
158142c2
FB
5364 }
5365 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5366 }
5367 if ( aExp == 0 ) {
bb98fe42 5368 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5369 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5370 }
5371 bSig |= LIT64( 0x8000000000000000 );
5372 zSign = aSign;
5373 expDiff = aExp - bExp;
5374 aSig1 = 0;
5375 if ( expDiff < 0 ) {
5376 if ( expDiff < -1 ) return a;
5377 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5378 expDiff = 0;
5379 }
5380 q = ( bSig <= aSig0 );
5381 if ( q ) aSig0 -= bSig;
5382 expDiff -= 64;
5383 while ( 0 < expDiff ) {
5384 q = estimateDiv128To64( aSig0, aSig1, bSig );
5385 q = ( 2 < q ) ? q - 2 : 0;
5386 mul64To128( bSig, q, &term0, &term1 );
5387 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5388 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5389 expDiff -= 62;
5390 }
5391 expDiff += 64;
5392 if ( 0 < expDiff ) {
5393 q = estimateDiv128To64( aSig0, aSig1, bSig );
5394 q = ( 2 < q ) ? q - 2 : 0;
5395 q >>= 64 - expDiff;
5396 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5397 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5398 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5399 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5400 ++q;
5401 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5402 }
5403 }
5404 else {
5405 term1 = 0;
5406 term0 = bSig;
5407 }
5408 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5409 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5410 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5411 && ( q & 1 ) )
5412 ) {
5413 aSig0 = alternateASig0;
5414 aSig1 = alternateASig1;
5415 zSign = ! zSign;
5416 }
5417 return
5418 normalizeRoundAndPackFloatx80(
ff32e16e 5419 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5420
5421}
5422
5423/*----------------------------------------------------------------------------
5424| Returns the square root of the extended double-precision floating-point
5425| value `a'. The operation is performed according to the IEC/IEEE Standard
5426| for Binary Floating-Point Arithmetic.
5427*----------------------------------------------------------------------------*/
5428
e5a41ffa 5429floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5430{
5431 flag aSign;
f4014512 5432 int32_t aExp, zExp;
bb98fe42
AF
5433 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5434 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5435
d1eb8f2a
AD
5436 if (floatx80_invalid_encoding(a)) {
5437 float_raise(float_flag_invalid, status);
5438 return floatx80_default_nan(status);
5439 }
158142c2
FB
5440 aSig0 = extractFloatx80Frac( a );
5441 aExp = extractFloatx80Exp( a );
5442 aSign = extractFloatx80Sign( a );
5443 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5444 if ((uint64_t)(aSig0 << 1)) {
5445 return propagateFloatx80NaN(a, a, status);
5446 }
158142c2
FB
5447 if ( ! aSign ) return a;
5448 goto invalid;
5449 }
5450 if ( aSign ) {
5451 if ( ( aExp | aSig0 ) == 0 ) return a;
5452 invalid:
ff32e16e 5453 float_raise(float_flag_invalid, status);
af39bc8c 5454 return floatx80_default_nan(status);
158142c2
FB
5455 }
5456 if ( aExp == 0 ) {
5457 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5458 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5459 }
5460 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5461 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5462 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5463 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5464 doubleZSig0 = zSig0<<1;
5465 mul64To128( zSig0, zSig0, &term0, &term1 );
5466 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5467 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5468 --zSig0;
5469 doubleZSig0 -= 2;
5470 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5471 }
5472 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5473 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5474 if ( zSig1 == 0 ) zSig1 = 1;
5475 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5476 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5477 mul64To128( zSig1, zSig1, &term2, &term3 );
5478 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5479 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5480 --zSig1;
5481 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5482 term3 |= 1;
5483 term2 |= doubleZSig0;
5484 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5485 }
5486 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5487 }
5488 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5489 zSig0 |= doubleZSig0;
a2f2d288
PM
5490 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5491 0, zExp, zSig0, zSig1, status);
158142c2
FB
5492}
5493
5494/*----------------------------------------------------------------------------
b689362d
AJ
5495| Returns 1 if the extended double-precision floating-point value `a' is equal
5496| to the corresponding value `b', and 0 otherwise. The invalid exception is
5497| raised if either operand is a NaN. Otherwise, the comparison is performed
5498| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5499*----------------------------------------------------------------------------*/
5500
e5a41ffa 5501int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5502{
5503
d1eb8f2a
AD
5504 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5505 || (extractFloatx80Exp(a) == 0x7FFF
5506 && (uint64_t) (extractFloatx80Frac(a) << 1))
5507 || (extractFloatx80Exp(b) == 0x7FFF
5508 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5509 ) {
ff32e16e 5510 float_raise(float_flag_invalid, status);
158142c2
FB
5511 return 0;
5512 }
5513 return
5514 ( a.low == b.low )
5515 && ( ( a.high == b.high )
5516 || ( ( a.low == 0 )
bb98fe42 5517 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5518 );
5519
5520}
5521
5522/*----------------------------------------------------------------------------
5523| Returns 1 if the extended double-precision floating-point value `a' is
5524| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5525| invalid exception is raised if either operand is a NaN. The comparison is
5526| performed according to the IEC/IEEE Standard for Binary Floating-Point
5527| Arithmetic.
158142c2
FB
5528*----------------------------------------------------------------------------*/
5529
e5a41ffa 5530int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5531{
5532 flag aSign, bSign;
5533
d1eb8f2a
AD
5534 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5535 || (extractFloatx80Exp(a) == 0x7FFF
5536 && (uint64_t) (extractFloatx80Frac(a) << 1))
5537 || (extractFloatx80Exp(b) == 0x7FFF
5538 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5539 ) {
ff32e16e 5540 float_raise(float_flag_invalid, status);
158142c2
FB
5541 return 0;
5542 }
5543 aSign = extractFloatx80Sign( a );
5544 bSign = extractFloatx80Sign( b );
5545 if ( aSign != bSign ) {
5546 return
5547 aSign
bb98fe42 5548 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5549 == 0 );
5550 }
5551 return
5552 aSign ? le128( b.high, b.low, a.high, a.low )
5553 : le128( a.high, a.low, b.high, b.low );
5554
5555}
5556
5557/*----------------------------------------------------------------------------
5558| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5559| less than the corresponding value `b', and 0 otherwise. The invalid
5560| exception is raised if either operand is a NaN. The comparison is performed
5561| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5562*----------------------------------------------------------------------------*/
5563
e5a41ffa 5564int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5565{
5566 flag aSign, bSign;
5567
d1eb8f2a
AD
5568 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5569 || (extractFloatx80Exp(a) == 0x7FFF
5570 && (uint64_t) (extractFloatx80Frac(a) << 1))
5571 || (extractFloatx80Exp(b) == 0x7FFF
5572 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5573 ) {
ff32e16e 5574 float_raise(float_flag_invalid, status);
158142c2
FB
5575 return 0;
5576 }
5577 aSign = extractFloatx80Sign( a );
5578 bSign = extractFloatx80Sign( b );
5579 if ( aSign != bSign ) {
5580 return
5581 aSign
bb98fe42 5582 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5583 != 0 );
5584 }
5585 return
5586 aSign ? lt128( b.high, b.low, a.high, a.low )
5587 : lt128( a.high, a.low, b.high, b.low );
5588
5589}
5590
67b7861d
AJ
5591/*----------------------------------------------------------------------------
5592| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5593| cannot be compared, and 0 otherwise. The invalid exception is raised if
5594| either operand is a NaN. The comparison is performed according to the
5595| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5596*----------------------------------------------------------------------------*/
e5a41ffa 5597int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5598{
d1eb8f2a
AD
5599 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5600 || (extractFloatx80Exp(a) == 0x7FFF
5601 && (uint64_t) (extractFloatx80Frac(a) << 1))
5602 || (extractFloatx80Exp(b) == 0x7FFF
5603 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5604 ) {
ff32e16e 5605 float_raise(float_flag_invalid, status);
67b7861d
AJ
5606 return 1;
5607 }
5608 return 0;
5609}
5610
158142c2 5611/*----------------------------------------------------------------------------
b689362d 5612| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5613| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5614| cause an exception. The comparison is performed according to the IEC/IEEE
5615| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5616*----------------------------------------------------------------------------*/
5617
e5a41ffa 5618int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5619{
5620
d1eb8f2a
AD
5621 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5622 float_raise(float_flag_invalid, status);
5623 return 0;
5624 }
158142c2 5625 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5626 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5627 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5628 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5629 ) {
af39bc8c
AM
5630 if (floatx80_is_signaling_nan(a, status)
5631 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5632 float_raise(float_flag_invalid, status);
b689362d 5633 }
158142c2
FB
5634 return 0;
5635 }
5636 return
5637 ( a.low == b.low )
5638 && ( ( a.high == b.high )
5639 || ( ( a.low == 0 )
bb98fe42 5640 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5641 );
5642
5643}
5644
5645/*----------------------------------------------------------------------------
5646| Returns 1 if the extended double-precision floating-point value `a' is less
5647| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5648| do not cause an exception. Otherwise, the comparison is performed according
5649| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5650*----------------------------------------------------------------------------*/
5651
e5a41ffa 5652int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5653{
5654 flag aSign, bSign;
5655
d1eb8f2a
AD
5656 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5657 float_raise(float_flag_invalid, status);
5658 return 0;
5659 }
158142c2 5660 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5661 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5662 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5663 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5664 ) {
af39bc8c
AM
5665 if (floatx80_is_signaling_nan(a, status)
5666 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5667 float_raise(float_flag_invalid, status);
158142c2
FB
5668 }
5669 return 0;
5670 }
5671 aSign = extractFloatx80Sign( a );
5672 bSign = extractFloatx80Sign( b );
5673 if ( aSign != bSign ) {
5674 return
5675 aSign
bb98fe42 5676 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5677 == 0 );
5678 }
5679 return
5680 aSign ? le128( b.high, b.low, a.high, a.low )
5681 : le128( a.high, a.low, b.high, b.low );
5682
5683}
5684
5685/*----------------------------------------------------------------------------
5686| Returns 1 if the extended double-precision floating-point value `a' is less
5687| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5688| an exception. Otherwise, the comparison is performed according to the
5689| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5690*----------------------------------------------------------------------------*/
5691
e5a41ffa 5692int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5693{
5694 flag aSign, bSign;
5695
d1eb8f2a
AD
5696 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5697 float_raise(float_flag_invalid, status);
5698 return 0;
5699 }
158142c2 5700 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5701 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5702 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5703 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5704 ) {
af39bc8c
AM
5705 if (floatx80_is_signaling_nan(a, status)
5706 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5707 float_raise(float_flag_invalid, status);
158142c2
FB
5708 }
5709 return 0;
5710 }
5711 aSign = extractFloatx80Sign( a );
5712 bSign = extractFloatx80Sign( b );
5713 if ( aSign != bSign ) {
5714 return
5715 aSign
bb98fe42 5716 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5717 != 0 );
5718 }
5719 return
5720 aSign ? lt128( b.high, b.low, a.high, a.low )
5721 : lt128( a.high, a.low, b.high, b.low );
5722
5723}
5724
67b7861d
AJ
5725/*----------------------------------------------------------------------------
5726| Returns 1 if the extended double-precision floating-point values `a' and `b'
5727| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5728| The comparison is performed according to the IEC/IEEE Standard for Binary
5729| Floating-Point Arithmetic.
5730*----------------------------------------------------------------------------*/
e5a41ffa 5731int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5732{
d1eb8f2a
AD
5733 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5734 float_raise(float_flag_invalid, status);
5735 return 1;
5736 }
67b7861d
AJ
5737 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5738 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5739 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5740 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5741 ) {
af39bc8c
AM
5742 if (floatx80_is_signaling_nan(a, status)
5743 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5744 float_raise(float_flag_invalid, status);
67b7861d
AJ
5745 }
5746 return 1;
5747 }
5748 return 0;
5749}
5750
158142c2
FB
5751/*----------------------------------------------------------------------------
5752| Returns the result of converting the quadruple-precision floating-point
5753| value `a' to the 32-bit two's complement integer format. The conversion
5754| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5755| Arithmetic---which means in particular that the conversion is rounded
5756| according to the current rounding mode. If `a' is a NaN, the largest
5757| positive integer is returned. Otherwise, if the conversion overflows, the
5758| largest integer with the same sign as `a' is returned.
5759*----------------------------------------------------------------------------*/
5760
f4014512 5761int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5762{
5763 flag aSign;
f4014512 5764 int32_t aExp, shiftCount;
bb98fe42 5765 uint64_t aSig0, aSig1;
158142c2
FB
5766
5767 aSig1 = extractFloat128Frac1( a );
5768 aSig0 = extractFloat128Frac0( a );
5769 aExp = extractFloat128Exp( a );
5770 aSign = extractFloat128Sign( a );
5771 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5772 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5773 aSig0 |= ( aSig1 != 0 );
5774 shiftCount = 0x4028 - aExp;
5775 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5776 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5777
5778}
5779
5780/*----------------------------------------------------------------------------
5781| Returns the result of converting the quadruple-precision floating-point
5782| value `a' to the 32-bit two's complement integer format. The conversion
5783| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5784| Arithmetic, except that the conversion is always rounded toward zero. If
5785| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5786| conversion overflows, the largest integer with the same sign as `a' is
5787| returned.
5788*----------------------------------------------------------------------------*/
5789
f4014512 5790int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5791{
5792 flag aSign;
f4014512 5793 int32_t aExp, shiftCount;
bb98fe42 5794 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5795 int32_t z;
158142c2
FB
5796
5797 aSig1 = extractFloat128Frac1( a );
5798 aSig0 = extractFloat128Frac0( a );
5799 aExp = extractFloat128Exp( a );
5800 aSign = extractFloat128Sign( a );
5801 aSig0 |= ( aSig1 != 0 );
5802 if ( 0x401E < aExp ) {
5803 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5804 goto invalid;
5805 }
5806 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5807 if (aExp || aSig0) {
5808 status->float_exception_flags |= float_flag_inexact;
5809 }
158142c2
FB
5810 return 0;
5811 }
5812 aSig0 |= LIT64( 0x0001000000000000 );
5813 shiftCount = 0x402F - aExp;
5814 savedASig = aSig0;
5815 aSig0 >>= shiftCount;
5816 z = aSig0;
5817 if ( aSign ) z = - z;
5818 if ( ( z < 0 ) ^ aSign ) {
5819 invalid:
ff32e16e 5820 float_raise(float_flag_invalid, status);
bb98fe42 5821 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5822 }
5823 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5824 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5825 }
5826 return z;
5827
5828}
5829
5830/*----------------------------------------------------------------------------
5831| Returns the result of converting the quadruple-precision floating-point
5832| value `a' to the 64-bit two's complement integer format. The conversion
5833| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5834| Arithmetic---which means in particular that the conversion is rounded
5835| according to the current rounding mode. If `a' is a NaN, the largest
5836| positive integer is returned. Otherwise, if the conversion overflows, the
5837| largest integer with the same sign as `a' is returned.
5838*----------------------------------------------------------------------------*/
5839
f42c2224 5840int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5841{
5842 flag aSign;
f4014512 5843 int32_t aExp, shiftCount;
bb98fe42 5844 uint64_t aSig0, aSig1;
158142c2
FB
5845
5846 aSig1 = extractFloat128Frac1( a );
5847 aSig0 = extractFloat128Frac0( a );
5848 aExp = extractFloat128Exp( a );
5849 aSign = extractFloat128Sign( a );
5850 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5851 shiftCount = 0x402F - aExp;
5852 if ( shiftCount <= 0 ) {
5853 if ( 0x403E < aExp ) {
ff32e16e 5854 float_raise(float_flag_invalid, status);
158142c2
FB
5855 if ( ! aSign
5856 || ( ( aExp == 0x7FFF )
5857 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5858 )
5859 ) {
5860 return LIT64( 0x7FFFFFFFFFFFFFFF );
5861 }
bb98fe42 5862 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5863 }
5864 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5865 }
5866 else {
5867 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5868 }
ff32e16e 5869 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5870
5871}
5872
5873/*----------------------------------------------------------------------------
5874| Returns the result of converting the quadruple-precision floating-point
5875| value `a' to the 64-bit two's complement integer format. The conversion
5876| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5877| Arithmetic, except that the conversion is always rounded toward zero.
5878| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5879| the conversion overflows, the largest integer with the same sign as `a' is
5880| returned.
5881*----------------------------------------------------------------------------*/
5882
f42c2224 5883int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5884{
5885 flag aSign;
f4014512 5886 int32_t aExp, shiftCount;
bb98fe42 5887 uint64_t aSig0, aSig1;
f42c2224 5888 int64_t z;
158142c2
FB
5889
5890 aSig1 = extractFloat128Frac1( a );
5891 aSig0 = extractFloat128Frac0( a );
5892 aExp = extractFloat128Exp( a );
5893 aSign = extractFloat128Sign( a );
5894 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5895 shiftCount = aExp - 0x402F;
5896 if ( 0 < shiftCount ) {
5897 if ( 0x403E <= aExp ) {
5898 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5899 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5900 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5901 if (aSig1) {
5902 status->float_exception_flags |= float_flag_inexact;
5903 }
158142c2
FB
5904 }
5905 else {
ff32e16e 5906 float_raise(float_flag_invalid, status);
158142c2
FB
5907 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5908 return LIT64( 0x7FFFFFFFFFFFFFFF );
5909 }
5910 }
bb98fe42 5911 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5912 }
5913 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5914 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5915 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5916 }
5917 }
5918 else {
5919 if ( aExp < 0x3FFF ) {
5920 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5921 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5922 }
5923 return 0;
5924 }
5925 z = aSig0>>( - shiftCount );
5926 if ( aSig1
bb98fe42 5927 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5928 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5929 }
5930 }
5931 if ( aSign ) z = - z;
5932 return z;
5933
5934}
5935
2e6d8568
BR
5936/*----------------------------------------------------------------------------
5937| Returns the result of converting the quadruple-precision floating-point value
5938| `a' to the 64-bit unsigned integer format. The conversion is
5939| performed according to the IEC/IEEE Standard for Binary Floating-Point
5940| Arithmetic---which means in particular that the conversion is rounded
5941| according to the current rounding mode. If `a' is a NaN, the largest
5942| positive integer is returned. If the conversion overflows, the
5943| largest unsigned integer is returned. If 'a' is negative, the value is
5944| rounded and zero is returned; negative values that do not round to zero
5945| will raise the inexact exception.
5946*----------------------------------------------------------------------------*/
5947
5948uint64_t float128_to_uint64(float128 a, float_status *status)
5949{
5950 flag aSign;
5951 int aExp;
5952 int shiftCount;
5953 uint64_t aSig0, aSig1;
5954
5955 aSig0 = extractFloat128Frac0(a);
5956 aSig1 = extractFloat128Frac1(a);
5957 aExp = extractFloat128Exp(a);
5958 aSign = extractFloat128Sign(a);
5959 if (aSign && (aExp > 0x3FFE)) {
5960 float_raise(float_flag_invalid, status);
5961 if (float128_is_any_nan(a)) {
5962 return LIT64(0xFFFFFFFFFFFFFFFF);
5963 } else {
5964 return 0;
5965 }
5966 }
5967 if (aExp) {
5968 aSig0 |= LIT64(0x0001000000000000);
5969 }
5970 shiftCount = 0x402F - aExp;
5971 if (shiftCount <= 0) {
5972 if (0x403E < aExp) {
5973 float_raise(float_flag_invalid, status);
5974 return LIT64(0xFFFFFFFFFFFFFFFF);
5975 }
5976 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5977 } else {
5978 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5979 }
5980 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5981}
5982
5983uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5984{
5985 uint64_t v;
5986 signed char current_rounding_mode = status->float_rounding_mode;
5987
5988 set_float_rounding_mode(float_round_to_zero, status);
5989 v = float128_to_uint64(a, status);
5990 set_float_rounding_mode(current_rounding_mode, status);
5991
5992 return v;
5993}
5994
158142c2
FB
5995/*----------------------------------------------------------------------------
5996| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5997| value `a' to the 32-bit unsigned integer format. The conversion
5998| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5999| Arithmetic except that the conversion is always rounded toward zero.
6000| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6001| if the conversion overflows, the largest unsigned integer is returned.
6002| If 'a' is negative, the value is rounded and zero is returned; negative
6003| values that do not round to zero will raise the inexact exception.
6004*----------------------------------------------------------------------------*/
6005
6006uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6007{
6008 uint64_t v;
6009 uint32_t res;
6010 int old_exc_flags = get_float_exception_flags(status);
6011
6012 v = float128_to_uint64_round_to_zero(a, status);
6013 if (v > 0xffffffff) {
6014 res = 0xffffffff;
6015 } else {
6016 return v;
6017 }
6018 set_float_exception_flags(old_exc_flags, status);
6019 float_raise(float_flag_invalid, status);
6020 return res;
6021}
6022
6023/*----------------------------------------------------------------------------
6024| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6025| value `a' to the single-precision floating-point format. The conversion
6026| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6027| Arithmetic.
6028*----------------------------------------------------------------------------*/
6029
e5a41ffa 6030float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6031{
6032 flag aSign;
f4014512 6033 int32_t aExp;
bb98fe42
AF
6034 uint64_t aSig0, aSig1;
6035 uint32_t zSig;
158142c2
FB
6036
6037 aSig1 = extractFloat128Frac1( a );
6038 aSig0 = extractFloat128Frac0( a );
6039 aExp = extractFloat128Exp( a );
6040 aSign = extractFloat128Sign( a );
6041 if ( aExp == 0x7FFF ) {
6042 if ( aSig0 | aSig1 ) {
ff32e16e 6043 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6044 }
6045 return packFloat32( aSign, 0xFF, 0 );
6046 }
6047 aSig0 |= ( aSig1 != 0 );
6048 shift64RightJamming( aSig0, 18, &aSig0 );
6049 zSig = aSig0;
6050 if ( aExp || zSig ) {
6051 zSig |= 0x40000000;
6052 aExp -= 0x3F81;
6053 }
ff32e16e 6054 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6055
6056}
6057
6058/*----------------------------------------------------------------------------
6059| Returns the result of converting the quadruple-precision floating-point
6060| value `a' to the double-precision floating-point format. The conversion
6061| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6062| Arithmetic.
6063*----------------------------------------------------------------------------*/
6064
e5a41ffa 6065float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6066{
6067 flag aSign;
f4014512 6068 int32_t aExp;
bb98fe42 6069 uint64_t aSig0, aSig1;
158142c2
FB
6070
6071 aSig1 = extractFloat128Frac1( a );
6072 aSig0 = extractFloat128Frac0( a );
6073 aExp = extractFloat128Exp( a );
6074 aSign = extractFloat128Sign( a );
6075 if ( aExp == 0x7FFF ) {
6076 if ( aSig0 | aSig1 ) {
ff32e16e 6077 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6078 }
6079 return packFloat64( aSign, 0x7FF, 0 );
6080 }
6081 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6082 aSig0 |= ( aSig1 != 0 );
6083 if ( aExp || aSig0 ) {
6084 aSig0 |= LIT64( 0x4000000000000000 );
6085 aExp -= 0x3C01;
6086 }
ff32e16e 6087 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6088
6089}
6090
158142c2
FB
6091/*----------------------------------------------------------------------------
6092| Returns the result of converting the quadruple-precision floating-point
6093| value `a' to the extended double-precision floating-point format. The
6094| conversion is performed according to the IEC/IEEE Standard for Binary
6095| Floating-Point Arithmetic.
6096*----------------------------------------------------------------------------*/
6097
e5a41ffa 6098floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6099{
6100 flag aSign;
f4014512 6101 int32_t aExp;
bb98fe42 6102 uint64_t aSig0, aSig1;
158142c2
FB
6103
6104 aSig1 = extractFloat128Frac1( a );
6105 aSig0 = extractFloat128Frac0( a );
6106 aExp = extractFloat128Exp( a );
6107 aSign = extractFloat128Sign( a );
6108 if ( aExp == 0x7FFF ) {
6109 if ( aSig0 | aSig1 ) {
ff32e16e 6110 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6111 }
0f605c88
LV
6112 return packFloatx80(aSign, floatx80_infinity_high,
6113 floatx80_infinity_low);
158142c2
FB
6114 }
6115 if ( aExp == 0 ) {
6116 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6117 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6118 }
6119 else {
6120 aSig0 |= LIT64( 0x0001000000000000 );
6121 }
6122 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6123 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6124
6125}
6126
158142c2
FB
6127/*----------------------------------------------------------------------------
6128| Rounds the quadruple-precision floating-point value `a' to an integer, and
6129| returns the result as a quadruple-precision floating-point value. The
6130| operation is performed according to the IEC/IEEE Standard for Binary
6131| Floating-Point Arithmetic.
6132*----------------------------------------------------------------------------*/
6133
e5a41ffa 6134float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6135{
6136 flag aSign;
f4014512 6137 int32_t aExp;
bb98fe42 6138 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6139 float128 z;
6140
6141 aExp = extractFloat128Exp( a );
6142 if ( 0x402F <= aExp ) {
6143 if ( 0x406F <= aExp ) {
6144 if ( ( aExp == 0x7FFF )
6145 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6146 ) {
ff32e16e 6147 return propagateFloat128NaN(a, a, status);
158142c2
FB
6148 }
6149 return a;
6150 }
6151 lastBitMask = 1;
6152 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6153 roundBitsMask = lastBitMask - 1;
6154 z = a;
a2f2d288 6155 switch (status->float_rounding_mode) {
dc355b76 6156 case float_round_nearest_even:
158142c2
FB
6157 if ( lastBitMask ) {
6158 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6159 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6160 }
6161 else {
bb98fe42 6162 if ( (int64_t) z.low < 0 ) {
158142c2 6163 ++z.high;
bb98fe42 6164 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6165 }
6166 }
dc355b76 6167 break;
f9288a76
PM
6168 case float_round_ties_away:
6169 if (lastBitMask) {
6170 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6171 } else {
6172 if ((int64_t) z.low < 0) {
6173 ++z.high;
6174 }
6175 }
6176 break;
dc355b76
PM
6177 case float_round_to_zero:
6178 break;
6179 case float_round_up:
6180 if (!extractFloat128Sign(z)) {
6181 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6182 }
6183 break;
6184 case float_round_down:
6185 if (extractFloat128Sign(z)) {
6186 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6187 }
dc355b76
PM
6188 break;
6189 default:
6190 abort();
158142c2
FB
6191 }
6192 z.low &= ~ roundBitsMask;
6193 }
6194 else {
6195 if ( aExp < 0x3FFF ) {
bb98fe42 6196 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6197 status->float_exception_flags |= float_flag_inexact;
158142c2 6198 aSign = extractFloat128Sign( a );
a2f2d288 6199 switch (status->float_rounding_mode) {
158142c2
FB
6200 case float_round_nearest_even:
6201 if ( ( aExp == 0x3FFE )
6202 && ( extractFloat128Frac0( a )
6203 | extractFloat128Frac1( a ) )
6204 ) {
6205 return packFloat128( aSign, 0x3FFF, 0, 0 );
6206 }
6207 break;
f9288a76
PM
6208 case float_round_ties_away:
6209 if (aExp == 0x3FFE) {
6210 return packFloat128(aSign, 0x3FFF, 0, 0);
6211 }
6212 break;
158142c2
FB
6213 case float_round_down:
6214 return
6215 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6216 : packFloat128( 0, 0, 0, 0 );
6217 case float_round_up:
6218 return
6219 aSign ? packFloat128( 1, 0, 0, 0 )
6220 : packFloat128( 0, 0x3FFF, 0, 0 );
6221 }
6222 return packFloat128( aSign, 0, 0, 0 );
6223 }
6224 lastBitMask = 1;
6225 lastBitMask <<= 0x402F - aExp;
6226 roundBitsMask = lastBitMask - 1;
6227 z.low = 0;
6228 z.high = a.high;
a2f2d288 6229 switch (status->float_rounding_mode) {
dc355b76 6230 case float_round_nearest_even:
158142c2
FB
6231 z.high += lastBitMask>>1;
6232 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6233 z.high &= ~ lastBitMask;
6234 }
dc355b76 6235 break;
f9288a76
PM
6236 case float_round_ties_away:
6237 z.high += lastBitMask>>1;
6238 break;
dc355b76
PM
6239 case float_round_to_zero:
6240 break;
6241 case float_round_up:
6242 if (!extractFloat128Sign(z)) {
158142c2
FB
6243 z.high |= ( a.low != 0 );
6244 z.high += roundBitsMask;
6245 }
dc355b76
PM
6246 break;
6247 case float_round_down:
6248 if (extractFloat128Sign(z)) {
6249 z.high |= (a.low != 0);
6250 z.high += roundBitsMask;
6251 }
6252 break;
6253 default:
6254 abort();
158142c2
FB
6255 }
6256 z.high &= ~ roundBitsMask;
6257 }
6258 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6259 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6260 }
6261 return z;
6262
6263}
6264
6265/*----------------------------------------------------------------------------
6266| Returns the result of adding the absolute values of the quadruple-precision
6267| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6268| before being returned. `zSign' is ignored if the result is a NaN.
6269| The addition is performed according to the IEC/IEEE Standard for Binary
6270| Floating-Point Arithmetic.
6271*----------------------------------------------------------------------------*/
6272
e5a41ffa
PM
6273static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6274 float_status *status)
158142c2 6275{
f4014512 6276 int32_t aExp, bExp, zExp;
bb98fe42 6277 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6278 int32_t expDiff;
158142c2
FB
6279
6280 aSig1 = extractFloat128Frac1( a );
6281 aSig0 = extractFloat128Frac0( a );
6282 aExp = extractFloat128Exp( a );
6283 bSig1 = extractFloat128Frac1( b );
6284 bSig0 = extractFloat128Frac0( b );
6285 bExp = extractFloat128Exp( b );
6286 expDiff = aExp - bExp;
6287 if ( 0 < expDiff ) {
6288 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6289 if (aSig0 | aSig1) {
6290 return propagateFloat128NaN(a, b, status);
6291 }
158142c2
FB
6292 return a;
6293 }
6294 if ( bExp == 0 ) {
6295 --expDiff;
6296 }
6297 else {
6298 bSig0 |= LIT64( 0x0001000000000000 );
6299 }
6300 shift128ExtraRightJamming(
6301 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6302 zExp = aExp;
6303 }
6304 else if ( expDiff < 0 ) {
6305 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6306 if (bSig0 | bSig1) {
6307 return propagateFloat128NaN(a, b, status);
6308 }
158142c2
FB
6309 return packFloat128( zSign, 0x7FFF, 0, 0 );
6310 }
6311 if ( aExp == 0 ) {
6312 ++expDiff;
6313 }
6314 else {
6315 aSig0 |= LIT64( 0x0001000000000000 );
6316 }
6317 shift128ExtraRightJamming(
6318 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6319 zExp = bExp;
6320 }
6321 else {
6322 if ( aExp == 0x7FFF ) {
6323 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6324 return propagateFloat128NaN(a, b, status);
158142c2
FB
6325 }
6326 return a;
6327 }
6328 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6329 if ( aExp == 0 ) {
a2f2d288 6330 if (status->flush_to_zero) {
e6afc87f 6331 if (zSig0 | zSig1) {
ff32e16e 6332 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6333 }
6334 return packFloat128(zSign, 0, 0, 0);
6335 }
fe76d976
PB
6336 return packFloat128( zSign, 0, zSig0, zSig1 );
6337 }
158142c2
FB
6338 zSig2 = 0;
6339 zSig0 |= LIT64( 0x0002000000000000 );
6340 zExp = aExp;
6341 goto shiftRight1;
6342 }
6343 aSig0 |= LIT64( 0x0001000000000000 );
6344 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6345 --zExp;
6346 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6347 ++zExp;
6348 shiftRight1:
6349 shift128ExtraRightJamming(
6350 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6351 roundAndPack:
ff32e16e 6352 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6353
6354}
6355
6356/*----------------------------------------------------------------------------
6357| Returns the result of subtracting the absolute values of the quadruple-
6358| precision floating-point values `a' and `b'. If `zSign' is 1, the
6359| difference is negated before being returned. `zSign' is ignored if the
6360| result is a NaN. The subtraction is performed according to the IEC/IEEE
6361| Standard for Binary Floating-Point Arithmetic.
6362*----------------------------------------------------------------------------*/
6363
e5a41ffa
PM
6364static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6365 float_status *status)
158142c2 6366{
f4014512 6367 int32_t aExp, bExp, zExp;
bb98fe42 6368 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6369 int32_t expDiff;
158142c2
FB
6370
6371 aSig1 = extractFloat128Frac1( a );
6372 aSig0 = extractFloat128Frac0( a );
6373 aExp = extractFloat128Exp( a );
6374 bSig1 = extractFloat128Frac1( b );
6375 bSig0 = extractFloat128Frac0( b );
6376 bExp = extractFloat128Exp( b );
6377 expDiff = aExp - bExp;
6378 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6379 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6380 if ( 0 < expDiff ) goto aExpBigger;
6381 if ( expDiff < 0 ) goto bExpBigger;
6382 if ( aExp == 0x7FFF ) {
6383 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6384 return propagateFloat128NaN(a, b, status);
158142c2 6385 }
ff32e16e 6386 float_raise(float_flag_invalid, status);
af39bc8c 6387 return float128_default_nan(status);
158142c2
FB
6388 }
6389 if ( aExp == 0 ) {
6390 aExp = 1;
6391 bExp = 1;
6392 }
6393 if ( bSig0 < aSig0 ) goto aBigger;
6394 if ( aSig0 < bSig0 ) goto bBigger;
6395 if ( bSig1 < aSig1 ) goto aBigger;
6396 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6397 return packFloat128(status->float_rounding_mode == float_round_down,
6398 0, 0, 0);
158142c2
FB
6399 bExpBigger:
6400 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6401 if (bSig0 | bSig1) {
6402 return propagateFloat128NaN(a, b, status);
6403 }
158142c2
FB
6404 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6405 }
6406 if ( aExp == 0 ) {
6407 ++expDiff;
6408 }
6409 else {
6410 aSig0 |= LIT64( 0x4000000000000000 );
6411 }
6412 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6413 bSig0 |= LIT64( 0x4000000000000000 );
6414 bBigger:
6415 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6416 zExp = bExp;
6417 zSign ^= 1;
6418 goto normalizeRoundAndPack;
6419 aExpBigger:
6420 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6421 if (aSig0 | aSig1) {
6422 return propagateFloat128NaN(a, b, status);
6423 }
158142c2
FB
6424 return a;
6425 }
6426 if ( bExp == 0 ) {
6427 --expDiff;
6428 }
6429 else {
6430 bSig0 |= LIT64( 0x4000000000000000 );
6431 }
6432 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6433 aSig0 |= LIT64( 0x4000000000000000 );
6434 aBigger:
6435 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6436 zExp = aExp;
6437 normalizeRoundAndPack:
6438 --zExp;
ff32e16e
PM
6439 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6440 status);
158142c2
FB
6441
6442}
6443
6444/*----------------------------------------------------------------------------
6445| Returns the result of adding the quadruple-precision floating-point values
6446| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6447| for Binary Floating-Point Arithmetic.
6448*----------------------------------------------------------------------------*/
6449
e5a41ffa 6450float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6451{
6452 flag aSign, bSign;
6453
6454 aSign = extractFloat128Sign( a );
6455 bSign = extractFloat128Sign( b );
6456 if ( aSign == bSign ) {
ff32e16e 6457 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6458 }
6459 else {
ff32e16e 6460 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6461 }
6462
6463}
6464
6465/*----------------------------------------------------------------------------
6466| Returns the result of subtracting the quadruple-precision floating-point
6467| values `a' and `b'. The operation is performed according to the IEC/IEEE
6468| Standard for Binary Floating-Point Arithmetic.
6469*----------------------------------------------------------------------------*/
6470
e5a41ffa 6471float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6472{
6473 flag aSign, bSign;
6474
6475 aSign = extractFloat128Sign( a );
6476 bSign = extractFloat128Sign( b );
6477 if ( aSign == bSign ) {
ff32e16e 6478 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6479 }
6480 else {
ff32e16e 6481 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6482 }
6483
6484}
6485
6486/*----------------------------------------------------------------------------
6487| Returns the result of multiplying the quadruple-precision floating-point
6488| values `a' and `b'. The operation is performed according to the IEC/IEEE
6489| Standard for Binary Floating-Point Arithmetic.
6490*----------------------------------------------------------------------------*/
6491
e5a41ffa 6492float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6493{
6494 flag aSign, bSign, zSign;
f4014512 6495 int32_t aExp, bExp, zExp;
bb98fe42 6496 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6497
6498 aSig1 = extractFloat128Frac1( a );
6499 aSig0 = extractFloat128Frac0( a );
6500 aExp = extractFloat128Exp( a );
6501 aSign = extractFloat128Sign( a );
6502 bSig1 = extractFloat128Frac1( b );
6503 bSig0 = extractFloat128Frac0( b );
6504 bExp = extractFloat128Exp( b );
6505 bSign = extractFloat128Sign( b );
6506 zSign = aSign ^ bSign;
6507 if ( aExp == 0x7FFF ) {
6508 if ( ( aSig0 | aSig1 )
6509 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6510 return propagateFloat128NaN(a, b, status);
158142c2
FB
6511 }
6512 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6513 return packFloat128( zSign, 0x7FFF, 0, 0 );
6514 }
6515 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6516 if (bSig0 | bSig1) {
6517 return propagateFloat128NaN(a, b, status);
6518 }
158142c2
FB
6519 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6520 invalid:
ff32e16e 6521 float_raise(float_flag_invalid, status);
af39bc8c 6522 return float128_default_nan(status);
158142c2
FB
6523 }
6524 return packFloat128( zSign, 0x7FFF, 0, 0 );
6525 }
6526 if ( aExp == 0 ) {
6527 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6528 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6529 }
6530 if ( bExp == 0 ) {
6531 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6532 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6533 }
6534 zExp = aExp + bExp - 0x4000;
6535 aSig0 |= LIT64( 0x0001000000000000 );
6536 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6537 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6538 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6539 zSig2 |= ( zSig3 != 0 );
6540 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6541 shift128ExtraRightJamming(
6542 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6543 ++zExp;
6544 }
ff32e16e 6545 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6546
6547}
6548
6549/*----------------------------------------------------------------------------
6550| Returns the result of dividing the quadruple-precision floating-point value
6551| `a' by the corresponding value `b'. The operation is performed according to
6552| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6553*----------------------------------------------------------------------------*/
6554
e5a41ffa 6555float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6556{
6557 flag aSign, bSign, zSign;
f4014512 6558 int32_t aExp, bExp, zExp;
bb98fe42
AF
6559 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6560 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6561
6562 aSig1 = extractFloat128Frac1( a );
6563 aSig0 = extractFloat128Frac0( a );
6564 aExp = extractFloat128Exp( a );
6565 aSign = extractFloat128Sign( a );
6566 bSig1 = extractFloat128Frac1( b );
6567 bSig0 = extractFloat128Frac0( b );
6568 bExp = extractFloat128Exp( b );
6569 bSign = extractFloat128Sign( b );
6570 zSign = aSign ^ bSign;
6571 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6572 if (aSig0 | aSig1) {
6573 return propagateFloat128NaN(a, b, status);
6574 }
158142c2 6575 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6576 if (bSig0 | bSig1) {
6577 return propagateFloat128NaN(a, b, status);
6578 }
158142c2
FB
6579 goto invalid;
6580 }
6581 return packFloat128( zSign, 0x7FFF, 0, 0 );
6582 }
6583 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6584 if (bSig0 | bSig1) {
6585 return propagateFloat128NaN(a, b, status);
6586 }
158142c2
FB
6587 return packFloat128( zSign, 0, 0, 0 );
6588 }
6589 if ( bExp == 0 ) {
6590 if ( ( bSig0 | bSig1 ) == 0 ) {
6591 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6592 invalid:
ff32e16e 6593 float_raise(float_flag_invalid, status);
af39bc8c 6594 return float128_default_nan(status);
158142c2 6595 }
ff32e16e 6596 float_raise(float_flag_divbyzero, status);
158142c2
FB
6597 return packFloat128( zSign, 0x7FFF, 0, 0 );
6598 }
6599 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6600 }
6601 if ( aExp == 0 ) {
6602 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6603 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6604 }
6605 zExp = aExp - bExp + 0x3FFD;
6606 shortShift128Left(
6607 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6608 shortShift128Left(
6609 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6610 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6611 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6612 ++zExp;
6613 }
6614 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6615 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6616 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6617 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6618 --zSig0;
6619 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6620 }
6621 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6622 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6623 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6624 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6625 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6626 --zSig1;
6627 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6628 }
6629 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6630 }
6631 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6632 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6633
6634}
6635
6636/*----------------------------------------------------------------------------
6637| Returns the remainder of the quadruple-precision floating-point value `a'
6638| with respect to the corresponding value `b'. The operation is performed
6639| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6640*----------------------------------------------------------------------------*/
6641
e5a41ffa 6642float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6643{
ed086f3d 6644 flag aSign, zSign;
f4014512 6645 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6646 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6647 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6648 int64_t sigMean0;
158142c2
FB
6649
6650 aSig1 = extractFloat128Frac1( a );
6651 aSig0 = extractFloat128Frac0( a );
6652 aExp = extractFloat128Exp( a );
6653 aSign = extractFloat128Sign( a );
6654 bSig1 = extractFloat128Frac1( b );
6655 bSig0 = extractFloat128Frac0( b );
6656 bExp = extractFloat128Exp( b );
158142c2
FB
6657 if ( aExp == 0x7FFF ) {
6658 if ( ( aSig0 | aSig1 )
6659 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6660 return propagateFloat128NaN(a, b, status);
158142c2
FB
6661 }
6662 goto invalid;
6663 }
6664 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6665 if (bSig0 | bSig1) {
6666 return propagateFloat128NaN(a, b, status);
6667 }
158142c2
FB
6668 return a;
6669 }
6670 if ( bExp == 0 ) {
6671 if ( ( bSig0 | bSig1 ) == 0 ) {
6672 invalid:
ff32e16e 6673 float_raise(float_flag_invalid, status);
af39bc8c 6674 return float128_default_nan(status);
158142c2
FB
6675 }
6676 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6677 }
6678 if ( aExp == 0 ) {
6679 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6680 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6681 }
6682 expDiff = aExp - bExp;
6683 if ( expDiff < -1 ) return a;
6684 shortShift128Left(
6685 aSig0 | LIT64( 0x0001000000000000 ),
6686 aSig1,
6687 15 - ( expDiff < 0 ),
6688 &aSig0,
6689 &aSig1
6690 );
6691 shortShift128Left(
6692 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6693 q = le128( bSig0, bSig1, aSig0, aSig1 );
6694 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6695 expDiff -= 64;
6696 while ( 0 < expDiff ) {
6697 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6698 q = ( 4 < q ) ? q - 4 : 0;
6699 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6700 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6701 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6702 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6703 expDiff -= 61;
6704 }
6705 if ( -64 < expDiff ) {
6706 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6707 q = ( 4 < q ) ? q - 4 : 0;
6708 q >>= - expDiff;
6709 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6710 expDiff += 52;
6711 if ( expDiff < 0 ) {
6712 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6713 }
6714 else {
6715 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6716 }
6717 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6718 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6719 }
6720 else {
6721 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6722 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6723 }
6724 do {
6725 alternateASig0 = aSig0;
6726 alternateASig1 = aSig1;
6727 ++q;
6728 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6729 } while ( 0 <= (int64_t) aSig0 );
158142c2 6730 add128(
bb98fe42 6731 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6732 if ( ( sigMean0 < 0 )
6733 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6734 aSig0 = alternateASig0;
6735 aSig1 = alternateASig1;
6736 }
bb98fe42 6737 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6738 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6739 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6740 status);
158142c2
FB
6741}
6742
6743/*----------------------------------------------------------------------------
6744| Returns the square root of the quadruple-precision floating-point value `a'.
6745| The operation is performed according to the IEC/IEEE Standard for Binary
6746| Floating-Point Arithmetic.
6747*----------------------------------------------------------------------------*/
6748
e5a41ffa 6749float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6750{
6751 flag aSign;
f4014512 6752 int32_t aExp, zExp;
bb98fe42
AF
6753 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6754 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6755
6756 aSig1 = extractFloat128Frac1( a );
6757 aSig0 = extractFloat128Frac0( a );
6758 aExp = extractFloat128Exp( a );
6759 aSign = extractFloat128Sign( a );
6760 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6761 if (aSig0 | aSig1) {
6762 return propagateFloat128NaN(a, a, status);
6763 }
158142c2
FB
6764 if ( ! aSign ) return a;
6765 goto invalid;
6766 }
6767 if ( aSign ) {
6768 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6769 invalid:
ff32e16e 6770 float_raise(float_flag_invalid, status);
af39bc8c 6771 return float128_default_nan(status);
158142c2
FB
6772 }
6773 if ( aExp == 0 ) {
6774 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6775 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6776 }
6777 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6778 aSig0 |= LIT64( 0x0001000000000000 );
6779 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6780 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6781 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6782 doubleZSig0 = zSig0<<1;
6783 mul64To128( zSig0, zSig0, &term0, &term1 );
6784 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6785 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6786 --zSig0;
6787 doubleZSig0 -= 2;
6788 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6789 }
6790 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6791 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6792 if ( zSig1 == 0 ) zSig1 = 1;
6793 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6794 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6795 mul64To128( zSig1, zSig1, &term2, &term3 );
6796 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6797 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6798 --zSig1;
6799 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6800 term3 |= 1;
6801 term2 |= doubleZSig0;
6802 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6803 }
6804 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6805 }
6806 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6807 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6808
6809}
6810
6811/*----------------------------------------------------------------------------
6812| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6813| the corresponding value `b', and 0 otherwise. The invalid exception is
6814| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6815| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6816*----------------------------------------------------------------------------*/
6817
e5a41ffa 6818int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6819{
6820
6821 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6822 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6823 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6824 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6825 ) {
ff32e16e 6826 float_raise(float_flag_invalid, status);
158142c2
FB
6827 return 0;
6828 }
6829 return
6830 ( a.low == b.low )
6831 && ( ( a.high == b.high )
6832 || ( ( a.low == 0 )
bb98fe42 6833 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6834 );
6835
6836}
6837
6838/*----------------------------------------------------------------------------
6839| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6840| or equal to the corresponding value `b', and 0 otherwise. The invalid
6841| exception is raised if either operand is a NaN. The comparison is performed
6842| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6843*----------------------------------------------------------------------------*/
6844
e5a41ffa 6845int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6846{
6847 flag aSign, bSign;
6848
6849 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6850 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6851 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6852 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6853 ) {
ff32e16e 6854 float_raise(float_flag_invalid, status);
158142c2
FB
6855 return 0;
6856 }
6857 aSign = extractFloat128Sign( a );
6858 bSign = extractFloat128Sign( b );
6859 if ( aSign != bSign ) {
6860 return
6861 aSign
bb98fe42 6862 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6863 == 0 );
6864 }
6865 return
6866 aSign ? le128( b.high, b.low, a.high, a.low )
6867 : le128( a.high, a.low, b.high, b.low );
6868
6869}
6870
6871/*----------------------------------------------------------------------------
6872| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6873| the corresponding value `b', and 0 otherwise. The invalid exception is
6874| raised if either operand is a NaN. The comparison is performed according
6875| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6876*----------------------------------------------------------------------------*/
6877
e5a41ffa 6878int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6879{
6880 flag aSign, bSign;
6881
6882 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6883 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6884 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6885 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6886 ) {
ff32e16e 6887 float_raise(float_flag_invalid, status);
158142c2
FB
6888 return 0;
6889 }
6890 aSign = extractFloat128Sign( a );
6891 bSign = extractFloat128Sign( b );
6892 if ( aSign != bSign ) {
6893 return
6894 aSign
bb98fe42 6895 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6896 != 0 );
6897 }
6898 return
6899 aSign ? lt128( b.high, b.low, a.high, a.low )
6900 : lt128( a.high, a.low, b.high, b.low );
6901
6902}
6903
67b7861d
AJ
6904/*----------------------------------------------------------------------------
6905| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6906| be compared, and 0 otherwise. The invalid exception is raised if either
6907| operand is a NaN. The comparison is performed according to the IEC/IEEE
6908| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6909*----------------------------------------------------------------------------*/
6910
e5a41ffa 6911int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6912{
6913 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6914 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6915 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6916 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6917 ) {
ff32e16e 6918 float_raise(float_flag_invalid, status);
67b7861d
AJ
6919 return 1;
6920 }
6921 return 0;
6922}
6923
158142c2
FB
6924/*----------------------------------------------------------------------------
6925| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6926| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6927| exception. The comparison is performed according to the IEC/IEEE Standard
6928| for Binary Floating-Point Arithmetic.
158142c2
FB
6929*----------------------------------------------------------------------------*/
6930
e5a41ffa 6931int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6932{
6933
6934 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6935 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6936 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6937 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6938 ) {
af39bc8c
AM
6939 if (float128_is_signaling_nan(a, status)
6940 || float128_is_signaling_nan(b, status)) {
ff32e16e 6941 float_raise(float_flag_invalid, status);
b689362d 6942 }
158142c2
FB
6943 return 0;
6944 }
6945 return
6946 ( a.low == b.low )
6947 && ( ( a.high == b.high )
6948 || ( ( a.low == 0 )
bb98fe42 6949 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6950 );
6951
6952}
6953
6954/*----------------------------------------------------------------------------
6955| Returns 1 if the quadruple-precision floating-point value `a' is less than
6956| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6957| cause an exception. Otherwise, the comparison is performed according to the
6958| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6959*----------------------------------------------------------------------------*/
6960
e5a41ffa 6961int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6962{
6963 flag aSign, bSign;
6964
6965 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6966 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6967 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6968 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6969 ) {
af39bc8c
AM
6970 if (float128_is_signaling_nan(a, status)
6971 || float128_is_signaling_nan(b, status)) {
ff32e16e 6972 float_raise(float_flag_invalid, status);
158142c2
FB
6973 }
6974 return 0;
6975 }
6976 aSign = extractFloat128Sign( a );
6977 bSign = extractFloat128Sign( b );
6978 if ( aSign != bSign ) {
6979 return
6980 aSign
bb98fe42 6981 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6982 == 0 );
6983 }
6984 return
6985 aSign ? le128( b.high, b.low, a.high, a.low )
6986 : le128( a.high, a.low, b.high, b.low );
6987
6988}
6989
6990/*----------------------------------------------------------------------------
6991| Returns 1 if the quadruple-precision floating-point value `a' is less than
6992| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6993| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6994| Standard for Binary Floating-Point Arithmetic.
6995*----------------------------------------------------------------------------*/
6996
e5a41ffa 6997int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6998{
6999 flag aSign, bSign;
7000
7001 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7002 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7003 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7004 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7005 ) {
af39bc8c
AM
7006 if (float128_is_signaling_nan(a, status)
7007 || float128_is_signaling_nan(b, status)) {
ff32e16e 7008 float_raise(float_flag_invalid, status);
158142c2
FB
7009 }
7010 return 0;
7011 }
7012 aSign = extractFloat128Sign( a );
7013 bSign = extractFloat128Sign( b );
7014 if ( aSign != bSign ) {
7015 return
7016 aSign
bb98fe42 7017 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7018 != 0 );
7019 }
7020 return
7021 aSign ? lt128( b.high, b.low, a.high, a.low )
7022 : lt128( a.high, a.low, b.high, b.low );
7023
7024}
7025
67b7861d
AJ
7026/*----------------------------------------------------------------------------
7027| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7028| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7029| comparison is performed according to the IEC/IEEE Standard for Binary
7030| Floating-Point Arithmetic.
7031*----------------------------------------------------------------------------*/
7032
e5a41ffa 7033int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7034{
7035 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7036 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7037 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7038 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7039 ) {
af39bc8c
AM
7040 if (float128_is_signaling_nan(a, status)
7041 || float128_is_signaling_nan(b, status)) {
ff32e16e 7042 float_raise(float_flag_invalid, status);
67b7861d
AJ
7043 }
7044 return 1;
7045 }
7046 return 0;
7047}
7048
e5a41ffa
PM
7049static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7050 int is_quiet, float_status *status)
f6714d36
AJ
7051{
7052 flag aSign, bSign;
7053
d1eb8f2a
AD
7054 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7055 float_raise(float_flag_invalid, status);
7056 return float_relation_unordered;
7057 }
f6714d36
AJ
7058 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7059 ( extractFloatx80Frac( a )<<1 ) ) ||
7060 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7061 ( extractFloatx80Frac( b )<<1 ) )) {
7062 if (!is_quiet ||
af39bc8c
AM
7063 floatx80_is_signaling_nan(a, status) ||
7064 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7065 float_raise(float_flag_invalid, status);
f6714d36
AJ
7066 }
7067 return float_relation_unordered;
7068 }
7069 aSign = extractFloatx80Sign( a );
7070 bSign = extractFloatx80Sign( b );
7071 if ( aSign != bSign ) {
7072
7073 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7074 ( ( a.low | b.low ) == 0 ) ) {
7075 /* zero case */
7076 return float_relation_equal;
7077 } else {
7078 return 1 - (2 * aSign);
7079 }
7080 } else {
7081 if (a.low == b.low && a.high == b.high) {
7082 return float_relation_equal;
7083 } else {
7084 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7085 }
7086 }
7087}
7088
e5a41ffa 7089int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7090{
ff32e16e 7091 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7092}
7093
e5a41ffa 7094int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7095{
ff32e16e 7096 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7097}
7098
e5a41ffa
PM
7099static inline int float128_compare_internal(float128 a, float128 b,
7100 int is_quiet, float_status *status)
1f587329
BS
7101{
7102 flag aSign, bSign;
7103
7104 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7105 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7106 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7107 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7108 if (!is_quiet ||
af39bc8c
AM
7109 float128_is_signaling_nan(a, status) ||
7110 float128_is_signaling_nan(b, status)) {
ff32e16e 7111 float_raise(float_flag_invalid, status);
1f587329
BS
7112 }
7113 return float_relation_unordered;
7114 }
7115 aSign = extractFloat128Sign( a );
7116 bSign = extractFloat128Sign( b );
7117 if ( aSign != bSign ) {
7118 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7119 /* zero case */
7120 return float_relation_equal;
7121 } else {
7122 return 1 - (2 * aSign);
7123 }
7124 } else {
7125 if (a.low == b.low && a.high == b.high) {
7126 return float_relation_equal;
7127 } else {
7128 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7129 }
7130 }
7131}
7132
e5a41ffa 7133int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7134{
ff32e16e 7135 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7136}
7137
e5a41ffa 7138int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7139{
ff32e16e 7140 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7141}
7142
e5a41ffa 7143floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7144{
7145 flag aSign;
326b9e98 7146 int32_t aExp;
bb98fe42 7147 uint64_t aSig;
9ee6e8bb 7148
d1eb8f2a
AD
7149 if (floatx80_invalid_encoding(a)) {
7150 float_raise(float_flag_invalid, status);
7151 return floatx80_default_nan(status);
7152 }
9ee6e8bb
PB
7153 aSig = extractFloatx80Frac( a );
7154 aExp = extractFloatx80Exp( a );
7155 aSign = extractFloatx80Sign( a );
7156
326b9e98
AJ
7157 if ( aExp == 0x7FFF ) {
7158 if ( aSig<<1 ) {
ff32e16e 7159 return propagateFloatx80NaN(a, a, status);
326b9e98 7160 }
9ee6e8bb
PB
7161 return a;
7162 }
326b9e98 7163
3c85c37f
PM
7164 if (aExp == 0) {
7165 if (aSig == 0) {
7166 return a;
7167 }
7168 aExp++;
7169 }
69397542 7170
326b9e98
AJ
7171 if (n > 0x10000) {
7172 n = 0x10000;
7173 } else if (n < -0x10000) {
7174 n = -0x10000;
7175 }
7176
9ee6e8bb 7177 aExp += n;
a2f2d288
PM
7178 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7179 aSign, aExp, aSig, 0, status);
9ee6e8bb 7180}
9ee6e8bb 7181
e5a41ffa 7182float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7183{
7184 flag aSign;
326b9e98 7185 int32_t aExp;
bb98fe42 7186 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7187
7188 aSig1 = extractFloat128Frac1( a );
7189 aSig0 = extractFloat128Frac0( a );
7190 aExp = extractFloat128Exp( a );
7191 aSign = extractFloat128Sign( a );
7192 if ( aExp == 0x7FFF ) {
326b9e98 7193 if ( aSig0 | aSig1 ) {
ff32e16e 7194 return propagateFloat128NaN(a, a, status);
326b9e98 7195 }
9ee6e8bb
PB
7196 return a;
7197 }
3c85c37f 7198 if (aExp != 0) {
69397542 7199 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7200 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7201 return a;
3c85c37f
PM
7202 } else {
7203 aExp++;
7204 }
69397542 7205
326b9e98
AJ
7206 if (n > 0x10000) {
7207 n = 0x10000;
7208 } else if (n < -0x10000) {
7209 n = -0x10000;
7210 }
7211
69397542
PB
7212 aExp += n - 1;
7213 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7214 , status);
9ee6e8bb
PB
7215
7216}