From 600a0099a1eb2335a3f9563534c112e11817002b Mon Sep 17 00:00:00 2001 From: Vadim Barkov Date: Fri, 5 Oct 2018 13:51:49 +0300 Subject: [PATCH] Bug 385411 s390x: Add z13 vector floating point support This adds support for the z/Architecture vector FP instructions that were introduced with z13. The patch was contributed by Vadim Barkov, with some clean-up and minor adjustments by Andreas Arnez. --- NEWS | 1 + VEX/priv/guest_s390_defs.h | 10 +- VEX/priv/guest_s390_helpers.c | 47 ++ VEX/priv/guest_s390_toIR.c | 875 ++++++++++++++++++++++++++++++++-- VEX/priv/host_s390_defs.c | 240 +++++++++- VEX/priv/host_s390_defs.h | 16 +- VEX/priv/host_s390_isel.c | 82 +++- 7 files changed, 1231 insertions(+), 40 deletions(-) diff --git a/NEWS b/NEWS index bfa7162dd7..ffaabd7d76 100644 --- a/NEWS +++ b/NEWS @@ -58,6 +58,7 @@ where XXXXXX is the bug number as listed below. 397187 z13 vector register support for vgdb gdbserver 401277 More bugs in z13 support 401112 LLVM 5.0 generates comparison against partially initialized data +385411 s390x: z13 vector floating-point instructions not implemented Release 3.14.0 (9 October 2018) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 3bfecbe316..d72cc9f6db 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -281,7 +281,11 @@ enum { S390_VEC_OP_VMALH = 13, S390_VEC_OP_VCH = 14, S390_VEC_OP_VCHL = 15, - S390_VEC_OP_LAST = 16 // supposed to be the last element in enum + S390_VEC_OP_VFCE = 16, + S390_VEC_OP_VFCH = 17, + S390_VEC_OP_VFCHE = 18, + S390_VEC_OP_VFTCI = 19, + S390_VEC_OP_LAST = 20 // supposed to be the last element in enum } s390x_vec_op_t; /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one @@ -300,8 +304,10 @@ typedef union { unsigned int m4 : 4; // field m4 of insn or zero if it's missing unsigned int m5 : 4; // field m5 of insn or zero if it's missing + unsigned int m6 : 4; // field m6 of insn or zero if it's missing + unsigned int i3 : 12; // field i3 of insn or zero if it's missing unsigned int read_only: 1; // don't write result to Guest State - unsigned int reserved : 27; // reserved for future + unsigned int reserved : 11; // reserved for future }; ULong serialized; } s390x_vec_op_details_t; diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index d9773e73e3..5877743c91 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -2498,6 +2498,10 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, {0xe7, 0xa9}, /* VMALH */ {0xe7, 0xfb}, /* VCH */ {0xe7, 0xf9}, /* VCHL */ + {0xe7, 0xe8}, /* VFCE */ + {0xe7, 0xeb}, /* VFCH */ + {0xe7, 0xea}, /* VFCHE */ + {0xe7, 0x4a} /* VFTCI */ }; union { @@ -2525,6 +2529,28 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, unsigned int rxb : 4; unsigned int op2 : 8; } VRRd; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt v3 : 4; + UInt : 4; + UInt m6 : 4; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRRc; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt i3 : 12; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRIe; UChar bytes[6]; } the_insn; @@ -2578,6 +2604,27 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, the_insn.VRRd.m6 = d->m5; break; + case S390_VEC_OP_VFCE: + case S390_VEC_OP_VFCH: + case S390_VEC_OP_VFCHE: + the_insn.VRRc.v1 = 1; + the_insn.VRRc.v2 = 2; + the_insn.VRRc.v3 = 3; + the_insn.VRRc.rxb = 0b1110; + the_insn.VRRc.m4 = d->m4; + the_insn.VRRc.m5 = d->m5; + the_insn.VRRc.m6 = d->m6; + break; + + case S390_VEC_OP_VFTCI: + the_insn.VRIe.v1 = 1; + the_insn.VRIe.v2 = 2; + the_insn.VRIe.rxb = 0b1100; + the_insn.VRIe.i3 = d->i3; + the_insn.VRIe.m4 = d->m4; + the_insn.VRIe.m5 = d->m5; + break; + default: vex_printf("operation = %d\n", d->op); vpanic("s390x_dirtyhelper_vec_op: unknown operation"); diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 50a5a41774..1c4ac390aa 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -86,6 +86,7 @@ typedef enum { S390_DECODE_UNKNOWN_INSN, S390_DECODE_UNIMPLEMENTED_INSN, S390_DECODE_UNKNOWN_SPECIAL_INSN, + S390_DECODE_SPECIFICATION_EXCEPTION, S390_DECODE_ERROR } s390_decode_t; @@ -421,6 +422,26 @@ yield_if(IRExpr *condition) S390X_GUEST_OFFSET(guest_IA))); } +/* Convenience macro to yield a specification exception if the given condition + is not met. Used to pass this type of decoding error up through the call + chain. */ +#define s390_insn_assert(mnm, cond) \ + do { \ + if (!(cond)) { \ + dis_res->whatNext = Dis_StopHere; \ + dis_res->jk_StopHere = Ijk_NoDecode; \ + return (mnm); \ + } \ + } while (0) + +/* Convenience function to check for a specification exception. */ +static Bool +is_specification_exception(void) +{ + return (dis_res->whatNext == Dis_StopHere && + dis_res->jk_StopHere == Ijk_NoDecode); +} + static __inline__ IRExpr *get_fpr_dw0(UInt); static __inline__ void put_fpr_dw0(UInt, IRExpr *); static __inline__ IRExpr *get_dpr_dw0(UInt); @@ -1770,6 +1791,11 @@ s390_vr_get_type(const UChar m) /* Determine if Zero Search (ZS) flag is set in m field */ #define s390_vr_is_zs_set(m) (((m) & 0b0010) != 0) +/* Check if the "Single-Element-Control" bit is set. + Used in vector FP instructions. + */ +#define s390_vr_is_single_element_control_set(m) (((m) & 0x8) != 0) + /* Generates arg1 < arg2 (or arg1 <= arg2 if allow_equal == True) expression. Arguments must have V128 type and are treated as unsigned 128-bit numbers. */ @@ -2001,12 +2027,14 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) return vr_offset(archreg) + sizeof(UShort) * index; case Ity_I32: + case Ity_F32: if(index > 3) { goto invalidIndex; } return vr_offset(archreg) + sizeof(UInt) * index; case Ity_I64: + case Ity_F64: if(index > 1) { goto invalidIndex; } @@ -2237,8 +2265,8 @@ encode_bfp_rounding_mode(UChar mode) case S390_BFP_ROUND_PER_FPC: rm = get_bfp_rounding_mode_from_fpc(); break; - case S390_BFP_ROUND_NEAREST_AWAY: /* not supported */ - case S390_BFP_ROUND_PREPARE_SHORT: /* not supported */ + case S390_BFP_ROUND_NEAREST_AWAY: rm = mkU32(Irrm_NEAREST_TIE_AWAY_0); break; + case S390_BFP_ROUND_PREPARE_SHORT: rm = mkU32(Irrm_PREPARE_SHORTER); break; case S390_BFP_ROUND_NEAREST_EVEN: rm = mkU32(Irrm_NEAREST); break; case S390_BFP_ROUND_ZERO: rm = mkU32(Irrm_ZERO); break; case S390_BFP_ROUND_POSINF: rm = mkU32(Irrm_PosINF); break; @@ -3524,6 +3552,26 @@ s390_format_VRI_VVIM(const HChar *(*irgen)(UChar v1, UChar v3, UShort i2, UChar s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v3, i2, m4); } +static void +s390_format_VRI_VVIMM(const HChar *(*irgen)(UChar v1, UChar v2, UShort i3, + UChar m4, UChar m5), + UChar v1, UChar v2, UShort i3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, i3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, i3, m4, m5); +} static void s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, @@ -3680,7 +3728,7 @@ s390_format_VRV_VVRDMT(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar m3) static void -s390_format_VRRd_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, +s390_format_VRR_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6), UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6, UChar rxb) @@ -3794,6 +3842,92 @@ s390_format_VRRd_VVVVM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, } +static void +s390_format_VRRa_VVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, + UChar m4, UChar m5), + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, m3, m4, m5); +} + +static void +s390_format_VRRa_VVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar m4, UChar m5), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, m4, m5); +} + +static void +s390_format_VRRa_VVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, + UChar m4), + UChar v1, UChar v2, UChar m3, UChar m4, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v2, m3, m4); +} + +static void +s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar m4, UChar m5, UChar m6), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5, m6); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), + mnm, v1, v2, v3, m4, m5, m6); +} + /*------------------------------------------------------------*/ /*--- Build IR for opcodes ---*/ /*------------------------------------------------------------*/ @@ -17895,6 +18029,575 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) return "vmalh"; } +static void +s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4); + UChar maxIndex = isSingleElementOp ? 0 : 1; + + /* For Iop_F32toF64 we do this: + f32[0] -> f64[0] + f32[2] -> f64[1] + + For Iop_F64toF32 we do this: + f64[0] -> f32[0] + f64[1] -> f32[2] + + The magic below with scaling factors is used to achieve the logic + described above. + */ + const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; + const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; + + const Bool isUnary = (op == Iop_F32toF64); + for (UChar i = 0; i <= maxIndex; i++) { + IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); + IRExpr* result; + if (!isUnary) { + result = binop(op, + mkexpr(encode_bfp_rounding_mode(m5)), + argument); + } else { + result = unop(op, argument); + } + put_vr(v1, toType, i * destinationIndexScaleFactor, result); + } + + if (isSingleElementOp) { + put_vr_dw1(v1, mkU64(0)); + } +} + +static const HChar * +s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcdg", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdg"; +} + +static const HChar * +s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcdlg", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdlg"; +} + +static const HChar * +s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcgd", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vcgd"; +} + +static const HChar * +s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vclgd", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vclgd"; +} + +static const HChar * +s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vfi", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, + v1, v2, m3, m4, m5); + + return "vcgld"; +} + +static const HChar * +s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vlde", m3 == 2); + + s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); + + return "vlde"; +} + +static const HChar * +s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vled", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); + + return "vled"; +} + +static const HChar * +s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vfpso", m3 == 3); + + IRExpr* result; + switch (m5) { + case 0: { + /* Invert sign */ + if (!s390_vr_is_single_element_control_set(m4)) { + result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), + mkU64(0)); + } + break; + } + + case 1: { + /* Set sign to negative */ + IRExpr* highHalf = mkU64(0x8000000000000000ULL); + if (!s390_vr_is_single_element_control_set(m4)) { + IRExpr* lowHalf = highHalf; + IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); + result = binop(Iop_OrV128, get_vr_qw(v2), mask); + } + else { + result = binop(Iop_64HLtoV128, + binop(Iop_Or64, get_vr_dw0(v2), highHalf), + mkU64(0ULL)); + } + + break; + } + + case 2: { + /* Set sign to positive */ + if (!s390_vr_is_single_element_control_set(m4)) { + result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), + mkU64(0)); + } + + break; + } + + default: + vpanic("s390_irgen_VFPSO: Invalid m5 value"); + } + + put_vr_qw(v1, result); + if (s390_vr_is_single_element_control_set(m4)) { + put_vr_dw1(v1, mkU64(0ULL)); + } + + return "vfpso"; +} + +static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5) +{ + IRExpr* result; + if (!s390_vr_is_single_element_control_set(m5)) { + result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2), get_vr_qw(v3)); + } else { + IRExpr* highHalf = triop(singleElementOp, + get_bfp_rounding_mode_from_fpc(), + get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + +static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar m3, UChar m4) +{ + IRExpr* result; + if (!s390_vr_is_single_element_control_set(m4)) { + result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2)); + } + else { + IRExpr* highHalf = binop(singleElementOp, + get_bfp_rounding_mode_from_fpc(), + get_vr(v2, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + + +static void +s390_vector_fp_mulAddOrSub(IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar v4, + UChar m5, UChar m6) +{ + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + IRTemp irrm_temp = newTemp(Ity_I32); + assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); + IRExpr* irrm = mkexpr(irrm_temp); + IRExpr* result; + IRExpr* highHalf = qop(singleElementOp, + irrm, + get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0), + get_vr(v4, Ity_F64, 0)); + + if (isSingleElementOp) { + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } else { + IRExpr* lowHalf = qop(singleElementOp, + irrm, + get_vr(v2, Ity_F64, 1), + get_vr(v3, Ity_F64, 1), + get_vr(v4, Ity_F64, 1)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + unop(Iop_ReinterpF64asI64, lowHalf)); + } + + put_vr_qw(v1, result); +} + +static const HChar * +s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfa", m4 == 3); + s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); + return "vfa"; +} + +static const HChar * +s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfs", m4 == 3); + s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); + return "vfs"; +} + +static const HChar * +s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfm", m4 == 3); + s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); + return "vfm"; +} + +static const HChar * +s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfd", m4 == 3); + s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); + return "vfd"; +} + +static const HChar * +s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_insn_assert("vfsq", m3 == 3); + s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); + + return "vfsq"; +} + +static const HChar * +s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + s390_insn_assert("vfma", m6 == 3); + s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6); + return "vfma"; +} + +static const HChar * +s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + s390_insn_assert("vfms", m6 == 3); + s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6); + return "vfms"; +} + +static const HChar * +s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_insn_assert("wfc", m3 == 3); + s390_insn_assert("wfc", m4 == 0); + + IRTemp cc_vex = newTemp(Ity_I32); + assign(cc_vex, binop(Iop_CmpF64, + get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); + + IRTemp cc_s390 = newTemp(Ity_I32); + assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); + s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False); + + return "wfc"; +} + +static const HChar * +s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_irgen_WFC(v1, v2, m3, m4); + + return "wfk"; +} + +static const HChar * +s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + s390_insn_assert("vfce", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); + } else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_EQ)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfce"; +} + +static const HChar * +s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_GT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfch"; +} + +static const HChar * +s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + s390_insn_assert("vfche", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } + else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), + get_vr(v2, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_LT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCHE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfche"; +} + +static const HChar * +s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) +{ + s390_insn_assert("vftci", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFTCI; + details.v1 = v1; + details.v2 = v2; + details.i3 = i3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + + return "vftci"; +} + /* New insns are added here. If an insn is contingent on a facility being installed also check whether the list of supported facilities in function @@ -19357,6 +20060,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRR; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt v3 : 4; + UInt : 4; + UInt m5 : 4; + UInt m4 : 4; + UInt m3 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRRa; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19369,6 +20084,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRRd; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int m6 : 4; + unsigned int : 4; + unsigned int m5 : 4; + unsigned int v4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRe; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19389,6 +20116,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRId; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt i3 : 12; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRIe; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19974,7 +20711,10 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe70000000046ULL: s390_format_VRI_VIM(s390_irgen_VGM, ovl.fmt.VRI.v1, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; - case 0xe7000000004aULL: /* VFTCI */ goto unimplemented; + case 0xe7000000004aULL: s390_format_VRI_VVIMM(s390_irgen_VFTCI, ovl.fmt.VRIe.v1, + ovl.fmt.VRIe.v2, ovl.fmt.VRIe.i3, + ovl.fmt.VRIe.m4, ovl.fmt.VRIe.m5, + ovl.fmt.VRIe.rxb); goto ok; case 0xe7000000004dULL: s390_format_VRI_VVIM(s390_irgen_VREP, ovl.fmt.VRI.v1, ovl.fmt.VRI.v3, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; @@ -20087,19 +20827,27 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; - case 0xe7000000008aULL: s390_format_VRRd_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, - ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, - ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, - ovl.fmt.VRRd.m6, - ovl.fmt.VRRd.rxb); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.m6, + ovl.fmt.VRRd.rxb); goto ok; case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe7000000008dULL: s390_format_VRR_VVVV(s390_irgen_VSEL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe7000000008eULL: /* VFMS */ goto unimplemented; - case 0xe7000000008fULL: /* VFMA */ goto unimplemented; + case 0xe7000000008eULL: s390_format_VRR_VVVVMM(s390_irgen_VFMS, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, + ovl.fmt.VRRe.rxb); goto ok; + case 0xe7000000008fULL: s390_format_VRR_VVVVMM(s390_irgen_VFMA, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, + ovl.fmt.VRRe.rxb); goto ok; case 0xe70000000094ULL: s390_format_VRR_VVVM(s390_irgen_VPK, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20184,17 +20932,50 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, ovl.fmt.VRRd.rxb); goto ok; - case 0xe700000000c0ULL: /* VCLGD */ goto unimplemented; - case 0xe700000000c1ULL: /* VCDLG */ goto unimplemented; - case 0xe700000000c2ULL: /* VCGD */ goto unimplemented; - case 0xe700000000c3ULL: /* VCDG */ goto unimplemented; - case 0xe700000000c4ULL: /* VLDE */ goto unimplemented; - case 0xe700000000c5ULL: /* VLED */ goto unimplemented; - case 0xe700000000c7ULL: /* VFI */ goto unimplemented; - case 0xe700000000caULL: /* WFK */ goto unimplemented; - case 0xe700000000cbULL: /* WFC */ goto unimplemented; - case 0xe700000000ccULL: /* VFPSO */ goto unimplemented; - case 0xe700000000ceULL: /* VFSQ */ goto unimplemented; + case 0xe700000000c0ULL: s390_format_VRRa_VVMMM(s390_irgen_VCLGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c1ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDLG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c2ULL: s390_format_VRRa_VVMMM(s390_irgen_VCGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c3ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c7ULL: s390_format_VRRa_VVMMM(s390_irgen_VFI, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000caULL: s390_format_VRRa_VVMM(s390_irgen_WFK, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000cbULL: s390_format_VRRa_VVMM(s390_irgen_WFC, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ccULL: s390_format_VRRa_VVMMM(s390_irgen_VFPSO, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ceULL: s390_format_VRRa_VVMM(s390_irgen_VFSQ, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000d4ULL: s390_format_VRR_VVM(s390_irgen_VUPLL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20221,13 +21002,37 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe700000000dfULL: s390_format_VRR_VVM(s390_irgen_VLP, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe700000000e2ULL: /* VFS */ goto unimplemented; - case 0xe700000000e3ULL: /* VFA */ goto unimplemented; - case 0xe700000000e5ULL: /* VFD */ goto unimplemented; - case 0xe700000000e7ULL: /* VFM */ goto unimplemented; - case 0xe700000000e8ULL: /* VFCE */ goto unimplemented; - case 0xe700000000eaULL: /* VFCHE */ goto unimplemented; - case 0xe700000000ebULL: /* VFCH */ goto unimplemented; + case 0xe700000000e2ULL: s390_format_VRRa_VVVMM(s390_irgen_VFS, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e3ULL: s390_format_VRRa_VVVMM(s390_irgen_VFA, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e5ULL: s390_format_VRRa_VVVMM(s390_irgen_VFD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e7ULL: s390_format_VRRa_VVVMM(s390_irgen_VFM, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e8ULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000eaULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCHE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ebULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCH, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; case 0xe700000000efULL: /* VFMAX */ goto unimplemented; case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, ovl.fmt.VRR.v1, @@ -21148,7 +21953,13 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres) dis_res->jk_StopHere = Ijk_Boring; } - if (status == S390_DECODE_OK) return insn_length; /* OK */ + if (status == S390_DECODE_OK) { + /* Adjust status if a specification exception was indicated. */ + if (is_specification_exception()) + status = S390_DECODE_SPECIFICATION_EXCEPTION; + else + return insn_length; /* OK */ + } /* Decoding failed somehow */ if (sigill_diag) { @@ -21166,6 +21977,10 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres) vex_printf("unimplemented special insn: "); break; + case S390_DECODE_SPECIFICATION_EXCEPTION: + vex_printf("specification exception: "); + break; + case S390_DECODE_ERROR: vex_printf("decoding error: "); break; diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 98ac9384db..22cdd04251 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -1711,6 +1711,23 @@ emit_VRR_VVM(UChar *p, ULong op, UChar v1, UChar v2, UChar m4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar m3, UChar m4, + UChar m5) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)m5) << 20; + the_insn |= ((ULong)m4) << 16; + the_insn |= ((ULong)m3) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VVVM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4) @@ -1762,6 +1779,26 @@ emit_VRR_VVVV(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRRe_VVVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4, + UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + rxb = s390_update_rxb(rxb, 4, &v4); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 24; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)v4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) @@ -1777,6 +1814,33 @@ emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 20; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)m4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} + +static UChar* +emit_VRR_VVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5) +{ + return emit_VRR_VVVMMM(p, op, v1, v2, v3, m4, m5, 0); +} + /*------------------------------------------------------------*/ /*--- Functions to emit particular instructions ---*/ /*------------------------------------------------------------*/ @@ -6057,6 +6121,116 @@ s390_emit_VLVGP(UChar *p, UChar v1, UChar r2, UChar r3) return emit_VRR_VRR(p, 0xE70000000062ULL, v1, r2, r3); } +static UChar * +s390_emit_VFPSO(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), "vfpso", v1, v2, m3, m4, + m5); + + return emit_VRR_VVMMM(p, 0xE700000000CCULL, v1, v2, m3, m4, m5); +} + +static UChar * +s390_emit_VFA(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfa", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e3ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfs", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e2ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFM(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfm", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e7ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFD(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfd", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e5ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFSQ(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vfsq", v1, v2, m3, m4); + + return emit_VRR_VVMMM(p, 0xE700000000CEULL, v1, v2, m3, m4, 0); +} + +static UChar * +s390_emit_VFMA(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfma", + v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008fULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFMS(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfms", + v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008eULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFCE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000e8ULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfch", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000ebULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCHE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfche", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000eaULL, v1, v2, v3, m4, m5, m6); +} + /*---------------------------------------------------------------*/ /*--- Constructors for the various s390_insn kinds ---*/ /*---------------------------------------------------------------*/ @@ -7201,7 +7375,6 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, { s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); - vassert(size == 16); insn->tag = S390_INSN_VEC_TRIOP; insn->size = size; @@ -7508,6 +7681,18 @@ s390_insn_as_string(const s390_insn *insn) op = "v-vunpacku"; break; + case S390_VEC_FLOAT_NEG: + op = "v-vfloatneg"; + break; + + case S390_VEC_FLOAT_SQRT: + op = "v-vfloatsqrt"; + break; + + case S390_VEC_FLOAT_ABS: + op = "v-vfloatabs"; + break; + default: goto fail; } @@ -7880,6 +8065,13 @@ s390_insn_as_string(const s390_insn *insn) case S390_VEC_PWSUM_DW: op = "v-vpwsumdw"; break; case S390_VEC_PWSUM_QW: op = "v-vpwsumqw"; break; case S390_VEC_INIT_FROM_GPRS: op = "v-vinitfromgprs"; break; + case S390_VEC_FLOAT_ADD: op = "v-vfloatadd"; break; + case S390_VEC_FLOAT_SUB: op = "v-vfloatsub"; break; + case S390_VEC_FLOAT_MUL: op = "v-vfloatmul"; break; + case S390_VEC_FLOAT_DIV: op = "v-vfloatdiv"; break; + case S390_VEC_FLOAT_COMPARE_EQUAL: op = "v-vfloatcmpeq"; break; + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: op = "v-vfloatcmple"; break; + case S390_VEC_FLOAT_COMPARE_LESS: op = "v-vfloatcmpl"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R", op, insn->variant.vec_binop.dst, @@ -7889,6 +8081,8 @@ s390_insn_as_string(const s390_insn *insn) case S390_INSN_VEC_TRIOP: switch (insn->variant.vec_triop.tag) { case S390_VEC_PERM: op = "v-vperm"; break; + case S390_VEC_FLOAT_MADD: op = "v-vfloatmadd"; break; + case S390_VEC_FLOAT_MSUB: op = "v-vfloatmsub"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R, %R", op, insn->variant.vec_triop.dst, @@ -9036,6 +9230,27 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VPOPCT(buf, v1, v2, s390_getM_from_size(insn->size)); } + case S390_VEC_FLOAT_NEG: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0); + } + case S390_VEC_FLOAT_ABS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2); + } + case S390_VEC_FLOAT_SQRT: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0); + } default: vpanic("s390_insn_unop_emit"); } @@ -11049,6 +11264,21 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size)); case S390_VEC_INIT_FROM_GPRS: return s390_emit_VLVGP(buf, v1, v2, v3); + case S390_VEC_FLOAT_ADD: + return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_SUB: + return s390_emit_VFS(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_MUL: + return s390_emit_VFM(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_DIV: + return s390_emit_VFD(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_COMPARE_EQUAL: + return s390_emit_VFCE(buf, v1, v2, v3, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: + return s390_emit_VFCH(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS: + return s390_emit_VFCHE(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + default: goto fail; } @@ -11070,8 +11300,14 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) UChar v4 = hregNumber(insn->variant.vec_triop.op3); switch (tag) { - case S390_VEC_PERM: + case S390_VEC_PERM: { + vassert(insn->size == 16); return s390_emit_VPERM(buf, v1, v2, v3, v4); + } + case S390_VEC_FLOAT_MADD: + return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3); + case S390_VEC_FLOAT_MSUB: + return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3); default: goto fail; } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 7ea01010e2..40f0472a2f 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -202,7 +202,10 @@ typedef enum { S390_VEC_ABS, S390_VEC_COUNT_LEADING_ZEROES, S390_VEC_COUNT_TRAILING_ZEROES, - S390_VEC_COUNT_ONES + S390_VEC_COUNT_ONES, + S390_VEC_FLOAT_NEG, + S390_VEC_FLOAT_ABS, + S390_VEC_FLOAT_SQRT } s390_unop_t; /* The kind of ternary BFP operations */ @@ -394,11 +397,20 @@ typedef enum { S390_VEC_PWSUM_QW, S390_VEC_INIT_FROM_GPRS, + S390_VEC_FLOAT_ADD, + S390_VEC_FLOAT_SUB, + S390_VEC_FLOAT_MUL, + S390_VEC_FLOAT_DIV, + S390_VEC_FLOAT_COMPARE_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS } s390_vec_binop_t; /* The vector operations with three operands */ typedef enum { - S390_VEC_PERM + S390_VEC_PERM, + S390_VEC_FLOAT_MADD, + S390_VEC_FLOAT_MSUB } s390_vec_triop_t; /* The details of a CDAS insn. Carved out to keep the size of diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index bc34f90ff3..79581ff7aa 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -787,10 +787,12 @@ get_bfp_rounding_mode(ISelEnv *env, IRExpr *irrm) IRRoundingMode mode = irrm->Iex.Const.con->Ico.U32; switch (mode) { - case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; - case Irrm_ZERO: return S390_BFP_ROUND_ZERO; - case Irrm_PosINF: return S390_BFP_ROUND_POSINF; - case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; + case Irrm_NEAREST_TIE_AWAY_0: return S390_BFP_ROUND_NEAREST_AWAY; + case Irrm_PREPARE_SHORTER: return S390_BFP_ROUND_PREPARE_SHORT; + case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; + case Irrm_ZERO: return S390_BFP_ROUND_ZERO; + case Irrm_PosINF: return S390_BFP_ROUND_POSINF; + case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; default: vpanic("get_bfp_rounding_mode"); } @@ -3871,6 +3873,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_COUNT_ONES; goto Iop_V_wrk; + case Iop_Neg64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_NEG; + goto Iop_V_wrk; + + case Iop_Abs64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_ABS; + goto Iop_V_wrk; + + Iop_V_wrk: { dst = newVRegV(env); reg1 = s390_isel_vec_expr(env, arg); @@ -4388,6 +4401,28 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_ELEM_ROLL_V; goto Iop_VV_wrk; + case Iop_CmpEQ64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_EQUAL; + goto Iop_VV_wrk; + + case Iop_CmpLE64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; + goto Iop_VV_wrk; + } + + case Iop_CmpLT64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS; + goto Iop_VV_wrk; + } + + case Iop_Sqrt64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_SQRT; + goto Iop_irrm_V_wrk; + case Iop_ShlN8x16: size = 1; shift_op = S390_VEC_ELEM_SHL_INT; @@ -4493,6 +4528,14 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return dst; } + Iop_irrm_V_wrk: { + set_bfp_rounding_mode_in_fpc(env, arg1); + reg1 = s390_isel_vec_expr(env, arg2); + + addInstr(env, s390_insn_unop(size, vec_op, dst, s390_opnd_reg(reg1))); + return dst; + } + case Iop_64HLtoV128: reg1 = s390_isel_int_expr(env, arg1); reg2 = s390_isel_int_expr(env, arg2); @@ -4516,6 +4559,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) IRExpr* arg1 = expr->Iex.Triop.details->arg1; IRExpr* arg2 = expr->Iex.Triop.details->arg2; IRExpr* arg3 = expr->Iex.Triop.details->arg3; + IROp vec_op; switch (op) { case Iop_SetElem8x16: size = 1; @@ -4551,6 +4595,36 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) dst, reg1, reg2, reg3)); return dst; + case Iop_Add64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_ADD; + goto Iop_irrm_VV_wrk; + + case Iop_Sub64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_SUB; + goto Iop_irrm_VV_wrk; + + case Iop_Mul64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_MUL; + goto Iop_irrm_VV_wrk; + case Iop_Div64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_DIV; + goto Iop_irrm_VV_wrk; + + Iop_irrm_VV_wrk: { + set_bfp_rounding_mode_in_fpc(env, arg1); + reg1 = s390_isel_vec_expr(env, arg2); + reg2 = s390_isel_vec_expr(env, arg3); + + addInstr(env, s390_insn_vec_binop(size, vec_op, + dst, reg1, reg2)); + + return dst; + } + default: goto irreducible; } -- 2.47.2