vassert(cond != Acc_ALWAYS);
return i;
}
-AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_MovZLQ;
- i->Ain.MovZLQ.src = src;
- i->Ain.MovZLQ.dst = dst;
+AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MovxLQ;
+ i->Ain.MovxLQ.syned = syned;
+ i->Ain.MovxLQ.src = src;
+ i->Ain.MovxLQ.dst = dst;
return i;
}
AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
vex_printf(",");
ppHRegAMD64(i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- vex_printf("movzlq ");
- ppHRegAMD64_lo32(i->Ain.MovZLQ.src);
+ case Ain_MovxLQ:
+ vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
+ ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
vex_printf(",");
- ppHRegAMD64(i->Ain.MovZLQ.dst);
+ ppHRegAMD64(i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- addHRegUse(u, HRmRead, i->Ain.MovZLQ.src);
- addHRegUse(u, HRmWrite, i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
+ addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
mapReg(m, &i->Ain.CMov64.dst);
return;
- case Ain_MovZLQ:
- mapReg(m, &i->Ain.MovZLQ.src);
- mapReg(m, &i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ mapReg(m, &i->Ain.MovxLQ.src);
+ mapReg(m, &i->Ain.MovxLQ.dst);
return;
case Ain_LoadEX:
mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
}
break;
- case Ain_MovZLQ:
- /* Produce a 32-bit reg-reg move, since the implicit zero-extend
- does what we want. */
- *p++ = clearWBit (
- rexAMode_R(i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst));
- *p++ = 0x89;
- p = doAMode_R(p, i->Ain.MovZLQ.src, i->Ain.MovZLQ.dst);
+ case Ain_MovxLQ:
+ /* No, _don't_ ask me why the sense of the args has to be
+ different in the S vs Z case. I don't know. */
+ if (i->Ain.MovxLQ.syned) {
+ /* Need REX.W = 1 here, but rexAMode_R does that for us. */
+ *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ *p++ = 0x63;
+ p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ } else {
+ /* Produce a 32-bit reg-reg move, since the implicit
+ zero-extend does what we want. */
+ *p++ = clearWBit (
+ rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
+ }
goto done;
case Ain_LoadEX:
&& e->Iex.Unop.op == Iop_32Uto64
&& e->Iex.Unop.arg->tag == Iex_RdTmp) {
HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
- return AMD64Instr_MovZLQ(src, dst);
+ return AMD64Instr_MovxLQ(False, src, dst);
}
if (0) { ppIRExpr(e); vex_printf("\n"); }
Bool second_is_UInt;
MatchInfo mi;
- DECLARE_PATTERN(p_8Uto64);
DECLARE_PATTERN(p_1Uto8_64to1);
+ DECLARE_PATTERN(p_LDle8_then_8Uto64);
+ DECLARE_PATTERN(p_LDle16_then_16Uto64);
IRType ty = typeOfIRExpr(env->type_env,e);
vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
break;
case Iop_Shr32:
- addInstr(env, AMD64Instr_MovZLQ(dst,dst));
+ addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
break;
case Iop_Sar8:
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
break;
case Iop_Sar32:
- addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, dst));
- addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, dst));
+ addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
break;
default:
ppIROp(e->Iex.Binop.op);
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
if (second_is_UInt)
- addInstr(env, AMD64Instr_MovZLQ(argR, argR));
+ addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
addInstr(env, mk_iMOVsd_RR(left64, rax));
addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
- addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
- addInstr(env, AMD64Instr_MovZLQ(rax,rax));
+ addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
+ addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
addInstr(env, mk_iMOVsd_RR(rax, dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
- addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
+ addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
addInstr(env, AMD64Instr_Alu64R(
Aalu_OR, AMD64RMI_Reg(lo32), hi32));
return hi32;
/* --------- UNARY OP --------- */
case Iex_Unop: {
- /* 32Uto64(8Uto32(expr8)) */
- DEFINE_PATTERN(p_8Uto64,
- unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
- if (matchIRExpr(&mi,p_8Uto64,e)) {
- IRExpr* expr8 = mi.bindee[0];
- HReg dst = newVRegI(env);
- HReg src = iselIntExpr_R(env, expr8);
- addInstr(env, mk_iMOVsd_RR(src,dst) );
- addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
- addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, dst));
- return dst;
- }
/* 1Uto8(64to1(expr64)) */
- DEFINE_PATTERN( p_1Uto8_64to1,
- unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
- if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
- IRExpr* expr64 = mi.bindee[0];
- HReg dst = newVRegI(env);
- HReg src = iselIntExpr_R(env, expr64);
- addInstr(env, mk_iMOVsd_RR(src,dst) );
- addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
- AMD64RMI_Imm(1), dst));
- return dst;
+ {
+ DEFINE_PATTERN( p_1Uto8_64to1,
+ unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
+ if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
+ IRExpr* expr64 = mi.bindee[0];
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, expr64);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ AMD64RMI_Imm(1), dst));
+ return dst;
+ }
}
-//.. /* 16Uto32(LDle(expr32)) */
-//.. {
-//.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
-//.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
-//.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
-//.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
-//.. HReg dst = newVRegI(env);
-//.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
-//.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
-//.. return dst;
-//.. }
-//.. }
+ /* 8Uto64(LDle(expr64)) */
+ {
+ DEFINE_PATTERN(p_LDle8_then_8Uto64,
+ unop(Iop_8Uto64,
+ IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
+ HReg dst = newVRegI(env);
+ AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ }
- switch (e->Iex.Unop.op) {
- case Iop_32Uto64: {
+ /* 16Uto64(LDle(expr64)) */
+ {
+ DEFINE_PATTERN(p_LDle16_then_16Uto64,
+ unop(Iop_16Uto64,
+ IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
HReg dst = newVRegI(env);
- HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
- addInstr(env, AMD64Instr_MovZLQ(src,dst) );
+ AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
return dst;
}
+ }
+
+ switch (e->Iex.Unop.op) {
+ case Iop_32Uto64:
case Iop_32Sto64: {
HReg dst = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
- UInt amt = 32;
- addInstr(env, mk_iMOVsd_RR(src,dst) );
- addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
- addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
+ addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
+ src, dst) );
return dst;
}
case Iop_128HIto64: {
HReg dst = newVRegI(env);
HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(pre,src));
- addInstr(env, AMD64Instr_MovZLQ(src,src));
+ addInstr(env, AMD64Instr_MovxLQ(False, src, src));
addInstr(env, mk_iMOVsd_RR(src,dst));
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
if (e->Iex.CCall.retty == Ity_I64)
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
else
- addInstr(env, AMD64Instr_MovZLQ(hregAMD64_RAX(), dst));
+ addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
return dst;
}
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
HReg tmp = newVRegI(env);
AMD64RMI* rmi2 = AMD64RMI_Imm(0);
- addInstr(env, AMD64Instr_MovZLQ(r1,tmp));
+ addInstr(env, AMD64Instr_MovxLQ(False, r1, tmp));
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
return Acc_NZ;
}