* are FRAC_SHIFT bits that may require rounding at the bottom of the
* fraction; these bits will be removed. The exponent will be biased
* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ *
+ * The saturate parameter controls saturation behavior for formats that
+ * support it -- when true, overflow produces max normal instead of infinity.
*/
static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
+ const FloatFmt *fmt, bool saturate)
{
const int exp_max = fmt->exp_max;
const int frac_shift = fmt->frac_shift;
const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
const uint64_t roundeven_mask = round_mask | frac_lsb;
uint64_t inc;
- bool overflow_norm = false;
+ bool overflow_norm = saturate;
int exp, flags = 0;
switch (s->float_rounding_mode) {
break;
case float_round_up:
inc = p->sign ? 0 : round_mask;
- overflow_norm = p->sign;
+ overflow_norm |= p->sign;
break;
case float_round_down:
inc = p->sign ? round_mask : 0;
- overflow_norm = !p->sign;
+ overflow_norm |= !p->sign;
break;
case float_round_to_odd:
overflow_norm = true;
}
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
- const FloatFmt *fmt)
+ const FloatFmt *fmt, bool saturate)
{
if (likely(is_anynorm(p->cls))) {
- parts_uncanon_normal(p, s, fmt);
+ parts_uncanon_normal(p, s, fmt, saturate);
} else {
switch (p->cls) {
case float_class_zero:
PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
-#define parts_uncanon_normal(A, S, F) \
- PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
+#define parts_uncanon_normal(A, S, F, X) \
+ PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F, X)
static void parts64_uncanon(FloatParts64 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
static void parts128_uncanon(FloatParts128 *p, float_status *status,
- const FloatFmt *fmt);
+ const FloatFmt *fmt, bool saturate);
-#define parts_uncanon(A, S, F) \
- PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
+#define parts_uncanon(A, S, F, X) \
+ PARTS_GENERIC_64_128(uncanon, A)(A, S, F, X)
static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
float_status *s,
const FloatFmt *params)
{
- parts_uncanon(p, s, params);
+ parts_uncanon(p, s, params, false);
return float16_pack_raw(p);
}
static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- parts_uncanon(p, s, &bfloat16_params);
+ parts_uncanon(p, s, &bfloat16_params, false);
return bfloat16_pack_raw(p);
}
static float32 float32_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- parts_uncanon(p, s, &float32_params);
+ parts_uncanon(p, s, &float32_params, false);
return float32_pack_raw(p);
}
static float64 float64_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- parts_uncanon(p, s, &float64_params);
+ parts_uncanon(p, s, &float64_params, false);
return float64_pack_raw(p);
}
static float64 float64r32_round_pack_canonical(FloatParts64 *p,
float_status *s)
{
- parts_uncanon(p, s, &float32_params);
+ parts_uncanon(p, s, &float32_params, false);
return float64r32_pack_raw(p);
}
static float128 float128_round_pack_canonical(FloatParts128 *p,
float_status *s)
{
- parts_uncanon(p, s, &float128_params);
+ parts_uncanon(p, s, &float128_params, false);
return float128_pack_raw(p);
}
case float_class_normal:
case float_class_denormal:
if (s->floatx80_rounding_precision == floatx80_precision_x) {
- parts_uncanon_normal(p, s, fmt);
+ parts_uncanon_normal(p, s, fmt, false);
frac = p->frac_hi;
exp = p->exp;
} else {
p64.sign = p->sign;
p64.exp = p->exp;
frac_truncjam(&p64, p);
- parts_uncanon_normal(&p64, s, fmt);
+ parts_uncanon_normal(&p64, s, fmt, false);
frac = p64.frac;
exp = p64.exp;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &float16_params);
+ parts_uncanon(pr, status, &float16_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &float32_params);
+ parts_uncanon(pr, status, &float32_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &float64_params);
+ parts_uncanon(pr, status, &float64_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &float32_params);
+ parts_uncanon(pr, status, &float32_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &bfloat16_params);
+ parts_uncanon(pr, status, &bfloat16_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
/* Round before applying negate result. */
- parts_uncanon(pr, status, &float128_params);
+ parts_uncanon(pr, status, &float128_params, false);
if ((flags & float_muladd_negate_result) && !is_nan(pr->cls)) {
pr->sign ^= 1;
}
/* Round remainder to the target format */
*r = *r_precise;
status->float_exception_flags = 0;
- parts_uncanon(r, status, fmt);
+ parts_uncanon(r, status, fmt, false);
r_flags = status->float_exception_flags;
r->frac &= (1ULL << fmt->frac_size) - 1;
parts_canonicalize(r, status, fmt);