case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
{
- // JRS FIXME:
- // * is the host_endness conditional below actually necessary?
- // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
- // That would be a lot more efficient.
- IRExpr * nextAddr;
- IRTemp w3 = newTemp( Ity_I32 );
- IRTemp w4 = newTemp( Ity_I32 );
+ /* Caller makes sure we are only called in mode64. */
+
+ /* If we supported swapping LE/BE loads in the backend then we could
+ just load the value with the bytes reversed by doing a BE load
+ on an LE machine and a LE load on a BE machine.
+
+ IRTemp dw1 = newTemp(Ity_I64);
+ if (host_endness == VexEndnessBE)
+ assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
+ else
+ assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
+ putIReg( rD_addr, mkexpr(dw1) );
+
+ But since we currently don't we load the value as is and then
+ switch it around with Iop_Reverse8sIn64_x1. */
+
+ IRTemp dw1 = newTemp(Ity_I64);
+ IRTemp dw2 = newTemp(Ity_I64);
DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
- assign( w1, load( Ity_I32, mkexpr( EA ) ) );
- assign( w2, gen_byterev32( w1 ) );
- nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
- ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
- assign( w3, load( Ity_I32, nextAddr ) );
- assign( w4, gen_byterev32( w3 ) );
- if (host_endness == VexEndnessLE)
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
- else
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+ assign( dw1, load(Ity_I64, mkexpr(EA)) );
+ assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
+ putIReg( rD_addr, mkexpr(dw2) );
break;
}
return rr;
}
+ case Iop_Reverse8sIn64_x1: {
+ /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
+ Can only be used in 64bit mode. */
+ vassert (mode64);
+
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ HReg rr = newVRegI(env);
+ HReg rMask = newVRegI(env);
+ HReg rnMask = newVRegI(env);
+ HReg rtHi = newVRegI(env);
+ HReg rtLo = newVRegI(env);
+
+ // Copy r_src since we need to modify it
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
+
+ // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
+ True/* 64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+ // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
+ True/* !64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+ // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
+ /* We don't need to mask anymore, just two more shifts and an or. */
+ addInstr(env, mk_iMOVds_RR(rtLo, rr));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 32)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rr, rr,
+ PPCRH_Imm(False/*!signed imm*/, 32)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
+
+ return rr;
+ }
+
case Iop_Left8:
case Iop_Left16:
case Iop_Left32: