From e05f55c77c4a330283778cfea042b6b18312e23d Mon Sep 17 00:00:00 2001 From: Andreas Arnez Date: Wed, 30 Nov 2022 19:15:53 +0100 Subject: [PATCH] s390: Fix VFMA, VFMS, VFNMA, and VFNMS for 128-bit FP When trying to execute any of the vector float multiply-and-add family instructions, Valgrind panics with the message "Iex_Qop with F128 data". So far all F128 operations were implemented with instructions that operate on FP register pairs. However, such instructions don't exist for Iop_MAddF128 and Iop_MSubF128, so they must be implemented with vector instructions instead. This was missed when emitting them in "guest_s390_toIR.c". Add the missing support. This also involves adding a few new features to host_s390_defs: * a new vector operation S390_VEC_INIT_FROM_FPRS * the capability to move the left half of a VR to an FPR * S390_VEC_FLOAT_MADD/_MSUB for 128-bit sizes --- VEX/priv/host_s390_defs.c | 15 ++++++++++--- VEX/priv/host_s390_defs.h | 1 + VEX/priv/host_s390_isel.c | 46 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 239d9d2997..ac5ca6cbe8 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -8356,6 +8356,7 @@ s390_insn_as_string(const s390_insn *insn) case S390_VEC_PWSUM_DW: op = "v-vpwsumdw"; break; case S390_VEC_PWSUM_QW: op = "v-vpwsumqw"; break; case S390_VEC_INIT_FROM_GPRS: op = "v-vinitfromgprs"; break; + case S390_VEC_INIT_FROM_FPRS: op = "v-vinitfromfprs"; break; case S390_VEC_FLOAT_ADD: op = "v-vfloatadd"; break; case S390_VEC_FLOAT_SUB: op = "v-vfloatsub"; break; case S390_VEC_FLOAT_MUL: op = "v-vfloatmul"; break; @@ -8772,6 +8773,9 @@ s390_insn_move_emit(UChar *buf, const s390_insn *insn) return s390_emit_LGDRw(buf, dst, src); } } + if (dst_class == HRcFlt64 && src_class == HRcVec128) { + return s390_emit_VLR(buf, dst, src); + } /* A move between floating point registers and general purpose registers of different size should never occur and indicates an error elsewhere. */ @@ -11692,6 +11696,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size)); case S390_VEC_INIT_FROM_GPRS: return s390_emit_VLVGP(buf, v1, v2, v3); + case S390_VEC_INIT_FROM_FPRS: + return s390_emit_VMRH(buf, v1, v2, v3, 3); case S390_VEC_FLOAT_ADD: return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0); case S390_VEC_FLOAT_SUB: @@ -11722,6 +11728,7 @@ static UChar * s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) { s390_vec_triop_t tag = insn->variant.vec_triop.tag; + UChar size = insn->size; UChar v1 = hregNumber(insn->variant.vec_triop.dst); UChar v2 = hregNumber(insn->variant.vec_triop.op1); UChar v3 = hregNumber(insn->variant.vec_triop.op2); @@ -11729,13 +11736,15 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) switch (tag) { case S390_VEC_PERM: { - vassert(insn->size == 16); + vassert(size == 16); return s390_emit_VPERM(buf, v1, v2, v3, v4); } case S390_VEC_FLOAT_MADD: - return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3); + return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, + s390_getM_from_size(size)); case S390_VEC_FLOAT_MSUB: - return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3); + return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, + s390_getM_from_size(size)); default: goto fail; } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index dc116106ef..375cc8402a 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -402,6 +402,7 @@ typedef enum { S390_VEC_PWSUM_QW, S390_VEC_INIT_FROM_GPRS, + S390_VEC_INIT_FROM_FPRS, S390_VEC_FLOAT_ADD, S390_VEC_FLOAT_SUB, S390_VEC_FLOAT_MUL, diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index 32c209edb5..3ae7c07c07 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -2153,8 +2153,47 @@ s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env, vpanic("Iex_Get with F128 data"); /* --------- 4-ary OP --------- */ - case Iex_Qop: - vpanic("Iex_Qop with F128 data"); + case Iex_Qop: { + IRQop *qop = expr->Iex.Qop.details; + s390_vec_triop_t vecop; + HReg op1_hi, op1_lo, op2_hi, op2_lo, op3_hi, op3_lo; + HReg dst, dstv_lo, op1, op2, op3; + + s390_isel_float128_expr(&op1_hi, &op1_lo, env, qop->arg2); + s390_isel_float128_expr(&op2_hi, &op2_lo, env, qop->arg3); + s390_isel_float128_expr(&op3_hi, &op3_lo, env, qop->arg4); + + /* Cannot carry out with FPRs; move operands to VRs instead. */ + op1 = newVRegV(env); + op2 = newVRegV(env); + op3 = newVRegV(env); + dst = newVRegV(env); + dstv_lo = newVRegV(env); + addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS, + op1, op1_hi, op1_lo)); + addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS, + op2, op2_hi, op2_lo)); + addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS, + op3, op3_hi, op3_lo)); + + switch (qop->op) { + case Iop_MAddF128: vecop = S390_VEC_FLOAT_MADD; break; + case Iop_MSubF128: vecop = S390_VEC_FLOAT_MSUB; break; + default: + goto irreducible; + } + + set_bfp_rounding_mode_in_fpc(env, qop->arg1); + addInstr(env, s390_insn_vec_triop(16, vecop, dst, op1, op2, op3)); + addInstr(env, s390_insn_vec_binop(8, S390_VEC_MERGEL, dstv_lo, dst, dst)); + + /* Move result to destination FPRs. */ + *dst_hi = newVRegF(env); + *dst_lo = newVRegF(env); + addInstr(env, s390_insn_move(8, *dst_hi, dst)); + addInstr(env, s390_insn_move(8, *dst_lo, dstv_lo)); + return; + } /* --------- TERNARY OP --------- */ case Iex_Triop: { @@ -4797,6 +4836,9 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) } } + case Iex_Qop: { + } + /* --------- MULTIPLEX --------- */ case Iex_ITE: { IRExpr *cond_expr = expr->Iex.ITE.cond; -- 2.47.2