From: Julian Seward Date: Thu, 2 Jan 2020 08:23:46 +0000 (+0100) Subject: amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts. X-Git-Tag: VALGRIND_3_16_0~155 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4eaa80103df9d1d396cc4b7427ea99faac11329d;p=thirdparty%2Fvalgrind.git amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts. Until now these have been handled by possibly widening the value to 64 bits, if necessary, followed by a 64-bit shift. That wastes instructions and code space. --- diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 29127c1258..3d237e112d 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -626,6 +626,14 @@ AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) { i->Ain.Sh64.dst = dst; return i; } +AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr)); + i->tag = Ain_Sh32; + i->Ain.Sh32.op = op; + i->Ain.Sh32.src = src; + i->Ain.Sh32.dst = dst; + return i; +} AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) { AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr)); i->tag = Ain_Test64; @@ -1090,6 +1098,14 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 ) vex_printf("$%d,", (Int)i->Ain.Sh64.src); ppHRegAMD64(i->Ain.Sh64.dst); return; + case Ain_Sh32: + vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op)); + if (i->Ain.Sh32.src == 0) + vex_printf("%%cl,"); + else + vex_printf("$%d,", (Int)i->Ain.Sh32.src); + ppHRegAMD64_lo32(i->Ain.Sh32.dst); + return; case Ain_Test64: vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32); ppHRegAMD64(i->Ain.Test64.dst); @@ -1471,6 +1487,11 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) if (i->Ain.Sh64.src == 0) addHRegUse(u, HRmRead, hregAMD64_RCX()); return; + case Ain_Sh32: + addHRegUse(u, HRmModify, i->Ain.Sh32.dst); + if (i->Ain.Sh32.src == 0) + addHRegUse(u, HRmRead, hregAMD64_RCX()); + return; case Ain_Test64: addHRegUse(u, HRmRead, i->Ain.Test64.dst); return; @@ -1808,6 +1829,9 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) case Ain_Sh64: mapReg(m, &i->Ain.Sh64.dst); return; + case Ain_Sh32: + mapReg(m, &i->Ain.Sh32.dst); + return; case Ain_Test64: mapReg(m, &i->Ain.Test64.dst); return; @@ -2762,6 +2786,30 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, } break; + case Ain_Sh32: + opc_cl = opc_imm = subopc = 0; + switch (i->Ain.Sh32.op) { + case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; + case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; + case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; + default: goto bad; + } + if (i->Ain.Sh32.src == 0) { + rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) ); + if (rex != 0x40) *p++ = rex; + *p++ = toUChar(opc_cl); + p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst); + goto done; + } else { + rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) ); + if (rex != 0x40) *p++ = rex; + *p++ = toUChar(opc_imm); + p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst); + *p++ = (UChar)(i->Ain.Sh32.src); + goto done; + } + break; + case Ain_Test64: /* testq sign-extend($imm32), %reg */ *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst); diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 3dfa9fbf5c..e2ed2613bf 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -359,7 +359,8 @@ typedef Ain_Imm64, /* Generate 64-bit literal to register */ Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */ Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */ - Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */ + Ain_Sh64, /* 64-bit shift, dst=REG */ + Ain_Sh32, /* 32-bit shift, dst=REG */ Ain_Test64, /* 64-bit test (AND, set flags, discard result) */ Ain_Unary64, /* 64-bit not and neg */ Ain_Lea64, /* 64-bit compute EA into a reg */ @@ -441,6 +442,11 @@ typedef UInt src; /* shift amount, or 0 means %cl */ HReg dst; } Sh64; + struct { + AMD64ShiftOp op; + UInt src; /* shift amount, or 0 means %cl */ + HReg dst; + } Sh32; struct { UInt imm32; HReg dst; @@ -744,6 +750,7 @@ extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ); extern AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ); extern AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp, AMD64RMI*, HReg ); extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, HReg ); +extern AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp, UInt, HReg ); extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ); extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* ); extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* ); diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index dfaabb4689..6b70e54789 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -1030,9 +1030,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); addInstr(env, mk_iMOVsd_RR(regL,dst)); - /* Do any necessary widening for 32/16/8 bit operands */ + /* Do any necessary widening for 16/8 bit operands. Also decide on the + final width at which the shift is to be done. */ + Bool shift64 = False; switch (e->Iex.Binop.op) { case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: + shift64 = True; break; case Iop_Shl32: case Iop_Shl16: case Iop_Shl8: break; @@ -1045,18 +1048,16 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) Aalu_AND, AMD64RMI_Imm(0xFFFF), dst)); break; case Iop_Shr32: - addInstr(env, AMD64Instr_MovxLQ(False, dst, dst)); break; case Iop_Sar8: - addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst)); - addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst)); + addInstr(env, AMD64Instr_Sh32(Ash_SHL, 24, dst)); + addInstr(env, AMD64Instr_Sh32(Ash_SAR, 24, dst)); break; case Iop_Sar16: - addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst)); - addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst)); + addInstr(env, AMD64Instr_Sh32(Ash_SHL, 16, dst)); + addInstr(env, AMD64Instr_Sh32(Ash_SAR, 16, dst)); break; case Iop_Sar32: - addInstr(env, AMD64Instr_MovxLQ(True, dst, dst)); break; default: ppIROp(e->Iex.Binop.op); @@ -1071,14 +1072,23 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e ) vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; vassert(nshift >= 0); - if (nshift > 0) + if (nshift > 0) { /* Can't allow nshift==0 since that means %cl */ - addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); + if (shift64) { + addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst)); + } else { + addInstr(env, AMD64Instr_Sh32(shOp, nshift, dst)); + } + } } else { /* General case; we have to force the amount into %cl. */ HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX())); - addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); + if (shift64) { + addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst)); + } else { + addInstr(env, AMD64Instr_Sh32(shOp, 0/* %cl */, dst)); + } } return dst; }