insn->variant.unop.dst, vreg_opnd);
}
+ /* v-vrep <reg>,<vreg>,<idx> */
+ if (insn->tag == S390_INSN_VEC_REPLICATE
+ && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
+ vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
+ return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
+ insn->variant.vec_replicate.dst, vreg_opnd);
+ }
+
no_match:
return NULL;
}
addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
break;
+ case S390_INSN_VEC_REPLICATE:
+ addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
+ addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
+ break;
+
default:
vpanic("s390_insn_get_reg_usage");
}
insn->variant.vec_triop.op3 =
lookupHRegRemap(m, insn->variant.vec_triop.op3);
break;
+
+ case S390_INSN_VEC_REPLICATE:
+ insn->variant.vec_replicate.dst =
+ lookupHRegRemap(m, insn->variant.vec_replicate.dst);
+ insn->variant.vec_replicate.op1 =
+ lookupHRegRemap(m, insn->variant.vec_replicate.op1);
+ break;
+
default:
vpanic("s390_insn_map_regs");
}
static UChar *
-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
+{
+ ULong the_insn = op;
+ ULong rxb = s390_update_rxb(0, 1, &v1);
+
+ the_insn |= ((ULong)v1) << 36;
+ the_insn |= ((ULong)i2) << 16;
+ the_insn |= ((ULong)m3) << 12;
+ the_insn |= ((ULong)rxb)<< 8;
+
+ return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
+{
+ ULong the_insn = op;
+ ULong rxb = s390_update_rxb(0, 1, &v1);
+ rxb = s390_update_rxb(rxb, 2, &v3);
+
+ the_insn |= ((ULong)v1) << 36;
+ the_insn |= ((ULong)v3) << 32;
+ the_insn |= ((ULong)i2) << 16;
+ the_insn |= ((ULong)m4) << 12;
+ the_insn |= ((ULong)rxb) << 8;
+
+ return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
{
ULong the_insn = op;
ULong rxb = s390_update_rxb(0, 1, &v1);
the_insn |= ((ULong)x2) << 32;
the_insn |= ((ULong)b2) << 28;
the_insn |= ((ULong)d2) << 16;
+ the_insn |= ((ULong)m3) << 12;
the_insn |= ((ULong)rxb)<< 8;
return emit_6bytes(p, the_insn);
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
- return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
+ return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
}
static UChar *
}
+static UChar *
+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
+{
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+ s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
+
+ return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
+}
+
+
static UChar *
s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
{
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
- return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
+ return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
}
static UChar *
-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
{
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
- s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
+ s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
- return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
+ return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
}
+static UChar *
+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
+{
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+ s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
+
+ return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
+}
+
static UChar *
s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
return insn;
}
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
+ UChar idx)
+{
+ s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
+
+ insn->tag = S390_INSN_VEC_REPLICATE;
+ insn->size = size;
+ insn->variant.vec_replicate.dst = dst;
+ insn->variant.vec_replicate.op1 = op1;
+ insn->variant.vec_replicate.idx = idx;
+
+ return insn;
+}
+
/*---------------------------------------------------------------*/
/*--- Debug print ---*/
/*---------------------------------------------------------------*/
insn->variant.vec_triop.op3);
break;
+ case S390_INSN_VEC_REPLICATE:
+ s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
+ insn->variant.vec_replicate.dst,
+ insn->variant.vec_replicate.op1,
+ insn->variant.vec_replicate.idx);
+ break;
+
default: goto fail;
}
}
+static UChar *
+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
+{
+ UChar v1 = hregNumber(insn->variant.unop.dst);
+ s390_opnd_RMI opnd = insn->variant.unop.src;
+ UChar r2;
+
+ switch (opnd.tag) {
+ case S390_OPND_AMODE: {
+ s390_amode* am = opnd.variant.am;
+ UInt b = hregNumber(am->b);
+ UInt x = hregNumber(am->x);
+ UInt d = am->d;
+
+ if (fits_unsigned_12bit(d)) {
+ return s390_emit_VLREP(buf, v1, x, b, d,
+ s390_getM_from_size(insn->size));
+ }
+ buf = s390_emit_load_mem(buf, insn->size, R0, am);
+ r2 = R0;
+ goto duplicate_from_gpr;
+ }
+
+ case S390_OPND_IMMEDIATE: {
+ ULong val = opnd.variant.imm;
+
+ if (ulong_fits_signed_16bit(val)) {
+ return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
+ }
+ buf = s390_emit_load_64imm(buf, R0, val);
+ r2 = R0;
+ goto duplicate_from_gpr;
+ }
+
+ case S390_OPND_REG:
+ r2 = hregNumber(opnd.variant.reg);
+
+ duplicate_from_gpr:
+ buf = s390_emit_VLVGP(buf, v1, r2, r2);
+ if (insn->size != 8) {
+ buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
+ s390_getM_from_size(insn->size));
+ }
+ return buf;
+ }
+
+ vpanic("s390_vec_duplicate_emit");
+}
+
+
static UChar *
s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
{
UShort i2 = insn->variant.unop.src.variant.imm;
return s390_emit_VGBM(buf, v1, i2);
}
- case S390_VEC_DUPLICATE: {
- vassert(insn->variant.unop.src.tag == S390_OPND_REG);
- UChar v1 = hregNumber(insn->variant.unop.dst);
- UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
- return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
- }
+ case S390_VEC_DUPLICATE: return s390_vec_duplicate_emit(buf, insn);
case S390_VEC_UNPACKLOWS: {
vassert(insn->variant.unop.src.tag == S390_OPND_REG);
vassert(insn->size < 8);
}
+static UChar *
+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
+{
+ UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
+ UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
+ UShort idx = (UShort) insn->variant.vec_replicate.idx;
+ return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
+}
+
+
Int
emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
Bool mode64, VexEndness endness_host,
case S390_INSN_VEC_TRIOP:
end = s390_insn_vec_triop_emit(buf, insn);
break;
+
+ case S390_INSN_VEC_REPLICATE:
+ end = s390_insn_vec_replicate_emit(buf, insn);
+ break;
+
fail:
default:
vpanic("emit_S390Instr");
}
/* --------- UNARY OP --------- */
case Iex_Unop: {
- UChar size_for_int_arg = 0;
HReg dst = INVALID_HREG;
HReg reg1 = INVALID_HREG;
s390_unop_t vec_unop = S390_UNOP_T_INVALID;
s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
IROp op = expr->Iex.Unop.op;
+ IROp arg_op = Iop_INVALID;
IRExpr* arg = expr->Iex.Unop.arg;
switch(op) {
case Iop_NotV128:
}
case Iop_Dup8x16:
- size = size_for_int_arg = 1;
- vec_unop = S390_VEC_DUPLICATE;
- goto Iop_V_int_wrk;
+ size = 1;
+ arg_op = Iop_GetElem8x16;
+ goto Iop_V_dup_wrk;
case Iop_Dup16x8:
- size = size_for_int_arg = 2;
- vec_unop = S390_VEC_DUPLICATE;
- goto Iop_V_int_wrk;
+ size = 2;
+ arg_op = Iop_GetElem16x8;
+ goto Iop_V_dup_wrk;
case Iop_Dup32x4:
- size = size_for_int_arg = 4;
- vec_unop = S390_VEC_DUPLICATE;
- goto Iop_V_int_wrk;
+ size = 4;
+ arg_op = Iop_GetElem32x4;
+ goto Iop_V_dup_wrk;
+
+ Iop_V_dup_wrk: {
+ dst = newVRegV(env);
+ if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
+ arg->Iex.Binop.arg2->tag == Iex_Const) {
+ ULong idx;
+ idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
+ reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
+ addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
+ } else {
+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
+ addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
+ }
+ return dst;
+ }
case Iop_Widen8Sto16x8:
size = 1;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWS;
- goto Iop_V_int_wrk;
+ goto Iop_V_widen_wrk;
case Iop_Widen16Sto32x4:
size = 2;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWS;
- goto Iop_V_int_wrk;
+ goto Iop_V_widen_wrk;
case Iop_Widen32Sto64x2:
size = 4;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWS;
- goto Iop_V_int_wrk;
+ goto Iop_V_widen_wrk;
case Iop_Widen8Uto16x8:
size = 1;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWU;
- goto Iop_V_int_wrk;
+ goto Iop_V_widen_wrk;
case Iop_Widen16Uto32x4:
size = 2;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWU;
- goto Iop_V_int_wrk;
+ goto Iop_V_widen_wrk;
case Iop_Widen32Uto64x2:
size = 4;
- size_for_int_arg = 8;
vec_unop = S390_VEC_UNPACKLOWU;
- goto Iop_V_int_wrk;
-
- Iop_V_int_wrk: {
- HReg vr1 = vec_generate_zeroes(env);
- s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
- reg1 = s390_isel_int_expr(env, arg);
+ goto Iop_V_widen_wrk;
+ Iop_V_widen_wrk: {
vassert(vec_unop != S390_UNOP_T_INVALID);
- addInstr(env,
- s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
- vr1, amode2, reg1));
-
+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
+ HReg vr1 = newVRegV(env);
+ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
dst = newVRegV(env);
addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
return dst;