.frac_shift = (-F - 1) & 63, \
.round_mask = (1ull << ((-F - 1) & 63)) - 1
+static const FloatFmt float8_e5m2_params = {
+ FLOAT_PARAMS(5, 2)
+};
+
static const FloatFmt float16_params = {
FLOAT_PARAMS(5, 10)
};
};
}
+static void QEMU_FLATTEN float8_e5m2_unpack_raw(FloatParts64 *p, float8_e5m2 f)
+{
+ unpack_raw64(p, &float8_e5m2_params, f);
+}
+
static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
{
unpack_raw64(p, &float16_params, f);
return ret;
}
+static float8_e5m2 QEMU_FLATTEN float8_e5m2_pack_raw(const FloatParts64 *p)
+{
+ return pack_raw64(p, &float8_e5m2_params);
+}
+
static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
{
return make_float16(pack_raw64(p, &float16_params));
* Pack/unpack routines with a specific FloatFmt.
*/
+static void float8_e5m2_unpack_canonical(FloatParts64 *p, float8_e5m2 f,
+ float_status *s)
+{
+ float8_e5m2_unpack_raw(p, f);
+ parts_canonicalize(p, s, &float8_e5m2_params);
+}
+
static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
float_status *s, const FloatFmt *params)
{
parts_canonicalize(p, s, &bfloat16_params);
}
+static float8_e5m2 float8_e5m2_round_pack_canonical(FloatParts64 *p,
+ float_status *s,
+ bool saturate)
+{
+ parts_uncanon(p, s, &float8_e5m2_params, saturate);
+ return float8_e5m2_pack_raw(p);
+}
+
static float16 float16a_round_pack_canonical(FloatParts64 *p,
float_status *s,
const FloatFmt *params)
}
}
+static void parts_float_to_e5m2(FloatParts64 *a, float_status *s, bool saturate)
+{
+ switch (a->cls) {
+ case float_class_snan:
+ case float_class_qnan:
+ parts_return_nan(a, s);
+ break;
+
+ case float_class_inf:
+ /* Per OCP, conversion in SATURATE mode bounds Inf to MAX. */
+ if (saturate) {
+ a->cls = float_class_normal;
+ a->exp = float8_e5m2_params.exp_max - 1;
+ a->frac = MAKE_64BIT_MASK(float8_e5m2_params.frac_shift,
+ float8_e5m2_params.frac_size + 1);
+ }
+ break;
+
+ case float_class_denormal:
+ float_raise(float_flag_input_denormal_used, s);
+ break;
+ case float_class_normal:
+ case float_class_zero:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void parts64_float_to_float(FloatParts64 *a, float_status *s)
{
if (is_nan(a->cls)) {
}
}
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2 a, float_status *s)
+{
+ FloatParts64 p;
+
+ float8_e5m2_unpack_canonical(&p, a, s);
+ parts_float_to_float(&p, s);
+ return bfloat16_round_pack_canonical(&p, s);
+}
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
return float64_round_pack_canonical(&p, s);
}
+float8_e5m2 float32_to_float8_e5m2(float32 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, s);
+ parts_float_to_e5m2(&p, s, saturate);
+ return float8_e5m2_round_pack_canonical(&p, s, saturate);
+}
+
float16 float32_to_float16(float32 a, bool ieee, float_status *s)
{
FloatParts64 p;
return float32_round_pack_canonical(&p, s);
}
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16 a, bool saturate, float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ parts_float_to_e5m2(&p, s, saturate);
+ return float8_e5m2_round_pack_canonical(&p, s, saturate);
+}
+
float32 bfloat16_to_float32(bfloat16 a, float_status *s)
{
FloatParts64 p;
float128 uint64_to_float128(uint64_t, float_status *status);
float128 uint128_to_float128(Int128, float_status *status);
+/*----------------------------------------------------------------------------
+| OCP FP8 conversion routines.
+*----------------------------------------------------------------------------*/
+
+bfloat16 float8_e5m2_to_bfloat16(float8_e5m2, float_status *status);
+float8_e5m2 bfloat16_to_float8_e5m2(bfloat16, bool sat, float_status *status);
+float8_e5m2 float32_to_float8_e5m2(float32, bool sat, float_status *status);
+
/*----------------------------------------------------------------------------
| Software half-precision conversion routines.
*----------------------------------------------------------------------------*/