DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
&zz_n zd zn n
@zz_1x2 ........ ... ..... ...... ..... zd:5 \
&zz_n n=1 zn=%zn_ax2
+@zz_2x1 ........ ... ..... ...... zn:5 ..... \
+ &zz_n n=1 zd=%zd_ax2
BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2
BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2
FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2
FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2
+
+FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1
+FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1
#include "vec_internal.h"
#include "sve_ldst_internal.h"
+
+static bool vectors_overlap(ARMVectorReg *x, unsigned nx,
+ ARMVectorReg *y, unsigned ny)
+{
+ return !(x + nx <= y || y + ny <= x);
+}
+
void helper_set_svcr(CPUARMState *env, uint32_t val, uint32_t mask)
{
aarch64_set_svcr(env, val, mask);
d[H2(i * 2 + 1)] = d1;
}
}
+
+/* Expand and convert */
+void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ ARMVectorReg scratch;
+ size_t oprsz = simd_oprsz(desc);
+ size_t i, n = oprsz / 4;
+ float16 *s = vs;
+ float32 *d0 = vd;
+ float32 *d1 = vd + sizeof(ARMVectorReg);
+
+ if (vectors_overlap(vd, 1, vs, 2)) {
+ s = memcpy(&scratch, s, oprsz);
+ }
+
+ for (i = 0; i < n; ++i) {
+ d0[H4(i)] = sve_f16_to_f32(s[H2(i)], fpst);
+ }
+ for (i = 0; i < n; ++i) {
+ d1[H4(i)] = sve_f16_to_f32(s[H2(n + i)], fpst);
+ }
+}
+
+/* Deinterleave and convert. */
+void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ size_t i, n = simd_oprsz(desc) / 4;
+ float16 *s = vs;
+ float32 *d0 = vd;
+ float32 *d1 = vd + sizeof(ARMVectorReg);
+
+ for (i = 0; i < n; ++i) {
+ float32 v0 = sve_f16_to_f32(s[H2(i * 2 + 0)], fpst);
+ float32 v1 = sve_f16_to_f32(s[H2(i * 2 + 1)], fpst);
+ d0[H4(i)] = v0;
+ d1[H4(i)] = v1;
+ }
+}
* FZ16. When converting from fp16, this affects flushing input denormals;
* when converting to fp16, this affects flushing output denormals.
*/
-static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
+float32 sve_f16_to_f32(float16 f, float_status *fpst)
{
bool save = get_flush_inputs_to_zero(fpst);
float32 ret;
FPST_A64, gen_helper_sme2_fcvt_n)
TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0,
FPST_A64, gen_helper_sme2_fcvtn)
+
+TRANS_FEAT(FCVT_w, aa64_sme_f16f16, do_zz_fpst, a, 0,
+ FPST_A64_F16, gen_helper_sme2_fcvt_w)
+TRANS_FEAT(FCVTL, aa64_sme_f16f16, do_zz_fpst, a, 0,
+ FPST_A64_F16, gen_helper_sme2_fcvtl)
bfloat16 helper_sme2_ah_fmax_b16(bfloat16 a, bfloat16 b, float_status *fpst);
bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst);
+float32 sve_f16_to_f32(float16 f, float_status *fpst);
float16 sve_f32_to_f16(float32 f, float_status *fpst);
#endif /* TARGET_ARM_VEC_INTERNAL_H */