UChar vB_addr = toUChar((theInstr >> 11) & 0x1F); /* theInstr[11:15] */
UInt opc2 = (theInstr >> 0) & 0x7FF; /* theInstr[0:10] */
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
if (opc1 != 0x4){
vex_printf("dis_av_shift(PPC32)(instr)\n");
return False;
DIP(" => not implemented\n");
return False;
- case 0x2C4: // vsr (Shift Right, AV p252)
+ case 0x2C4: { // vsr (Shift Right, AV p251)
DIP("vsr v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
-
+ IRTemp sh = newTemp(Ity_I8);
+ assign( sh, binop(Iop_And8, mkU8(0x7),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
case 0x304: // vsrab (Shift Right Algebraic B, AV p253)
DIP("vsrab v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
DIP(" => not implemented\n");
DIP(" => not implemented\n");
return False;
- case 0x28C: // vspltw (Splat Word, AV p250)
+ case 0x28C: { // vspltw (Splat Word, AV p250)
DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
- DIP(" => not implemented\n");
- return False;
-
+ /* vD = Dup32x4( vB[UIMM_5] ) */
+ unsigned int sh_uimm = (3-UIMM_5)*32;
+ putVReg( vD_addr, unop(Iop_Dup32x4,
+ unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) );
+ break;
+ }
case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247)
DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8);
DIP(" => not implemented\n");
}
+/* --------- Operand, which can be a vector reg or a simm5. --------- */
+
+PPC32VI5s* PPC32VI5s_Imm ( Char simm5 ) {
+ PPC32VI5s* op = LibVEX_Alloc(sizeof(PPC32VI5s));
+ op->tag = Pvi_Imm;
+ op->Pvi.Imm5s = simm5;
+ vassert(simm5 >= -16 && simm5 <= 15);
+ return op;
+}
+PPC32VI5s* PPC32VI5s_Reg ( HReg reg ) {
+ PPC32VI5s* op = LibVEX_Alloc(sizeof(PPC32VI5s));
+ op->tag = Pvi_Reg;
+ op->Pvi.Reg = reg;
+ vassert(hregClass(reg) == HRcVec128);
+ return op;
+}
+
+void ppPPC32VI5s ( PPC32VI5s* src ) {
+ switch (src->tag) {
+ case Pvi_Imm:
+ vex_printf("%d", (Int)src->Pvi.Imm5s);
+ break;
+ case Pvi_Reg:
+ ppHRegPPC32(src->Pvi.Reg);
+ break;
+ default:
+ vpanic("ppPPC32VI5s");
+ }
+}
+
+/* An PPC32VI5s can only be used in a "read" context (what would it
+ mean to write or modify a literal?) and so we enumerate its
+ registers accordingly. */
+static void addRegUsage_PPC32VI5s ( HRegUsage* u, PPC32VI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:
+ return;
+ case Pvi_Reg:
+ addHRegUse(u, HRmRead, dst->Pvi.Reg);
+ return;
+ default:
+ vpanic("addRegUsage_PPC32VI5s");
+ }
+}
+
+static void mapRegs_PPC32VI5s ( HRegRemap* m, PPC32VI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:
+ return;
+ case Pvi_Reg:
+ dst->Pvi.Reg = lookupHRegRemap(m, dst->Pvi.Reg);
+ return;
+ default:
+ vpanic("mapRegs_PPC32VI5s");
+ }
+}
+
+
/* --------- Instructions. --------- */
HChar* showPPC32UnaryOp ( PPC32UnaryOp op ) {
i->Pin.AvShlDbl.srcR = srcR;
return i;
}
-PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ) {
+PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src ) {
PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
i->tag = Pin_AvSplat;
i->Pin.AvSplat.sz = sz;
return;
case Pin_AvSplat: {
- UChar ch_sz = toUChar(
- (i->Pin.AvSplat.sz == 8) ? 'b' :
- (i->Pin.AvSplat.sz == 16) ? 'h' : 'w'
- );
+ UChar sz = i->Pin.AvSplat.sz;
+ UChar ch_sz = toUChar( (sz == 8) ? 'b' : (sz == 16) ? 'h' : 'w' );
vex_printf("vsplt%s%c ",
- i->Pin.AvSplat.src->tag == Pri_Imm ? "is" : "", ch_sz);
+ i->Pin.AvSplat.src->tag == Pvi_Imm ? "is" : "", ch_sz);
ppHRegPPC32(i->Pin.AvSplat.dst);
vex_printf(",");
- if (i->Pin.AvSplat.src->tag == Pri_Imm) {
- vex_printf("%d", (Char)(i->Pin.AvSplat.src->Pri.Imm));
- } else {
- ppHRegPPC32(i->Pin.AvSplat.src->Pri.Reg);
- vex_printf(", 0");
- }
+ ppPPC32VI5s(i->Pin.AvSplat.src);
+ if (i->Pin.AvSplat.src->tag == Pvi_Reg)
+ vex_printf(", %u", (128/sz)-1); /* louis lane */
return;
}
addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcR);
return;
case Pin_AvSplat:
- addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst);
- addRegUsage_PPC32RI(u, i->Pin.AvSplat.src);
+ addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst);
+ addRegUsage_PPC32VI5s(u, i->Pin.AvSplat.src);
return;
case Pin_AvCMov:
addHRegUse(u, HRmModify, i->Pin.AvCMov.dst);
return;
case Pin_AvSplat:
mapReg(m, &i->Pin.AvSplat.dst);
- mapRegs_PPC32RI(m, i->Pin.AvSplat.src);
+ mapRegs_PPC32VI5s(m, i->Pin.AvSplat.src);
return;
case Pin_AvCMov:
mapReg(m, &i->Pin.AvCMov.dst);
UInt v_srcL = vregNo(i->Pin.AvBinary.srcL);
UInt v_srcR = vregNo(i->Pin.AvBinary.srcR);
UInt opc2;
+ if (i->Pin.AvBinary.op == Pav_SHL) {
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036 ); // vslo
+ p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 452 ); // vsl
+ goto done;
+ }
+ if (i->Pin.AvBinary.op == Pav_SHR) {
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100 ); // vsro
+ p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 708 ); // vsr
+ goto done;
+ }
switch (i->Pin.AvBinary.op) {
/* Bitwise */
case Pav_AND: opc2 = 1028; break; // vand
case Pav_OR: opc2 = 1156; break; // vor
case Pav_XOR: opc2 = 1220; break; // vxor
-
- /* Shift */
- case Pav_SHL: opc2 = 452; break; // vsl
- case Pav_SHR: opc2 = 708; break; // vsr
-
default:
goto bad;
}
case Pin_AvSplat: { // vsplt(is)(b,h,w)
UInt v_dst = vregNo(i->Pin.AvShlDbl.dst);
UChar sz = i->Pin.AvSplat.sz;
- UInt v_src, simm_src, opc2;
+ UInt v_src, opc2;
vassert(sz == 8 || sz == 16 || sz == 32);
- if (i->Pin.AvSplat.src->tag == Pri_Imm) {
+ if (i->Pin.AvSplat.src->tag == Pvi_Imm) {
opc2 = (sz == 8) ? 780 : (sz == 16) ? 844 : 908; // 8,16,32
- simm_src = i->Pin.AvSplat.src->Pri.Imm;
- p = mkFormVX( p, 4, v_dst, simm_src, 0, opc2 );
- } else { // Pri_Reg
+ /* expects 5-bit-signed-imm */
+ Char simm5 = i->Pin.AvSplat.src->Pvi.Imm5s;
+ vassert(simm5 >= -16 && simm5 <= 15);
+ p = mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2 );
+ }
+ else { // Pri_Reg
opc2 = (sz == 8) ? 524 : (sz == 16) ? 588 : 652; // 8,16,32
- v_src = iregNo(i->Pin.AvSplat.src->Pri.Reg);
- p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ vassert(hregClass(i->Pin.AvSplat.src->Pvi.Reg) == HRcVec128);
+ v_src = vregNo(i->Pin.AvSplat.src->Pvi.Reg);
+ UInt lowest_lane = (128/sz)-1;
+ p = mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2 );
}
goto done;
}
extern void ppPPC32RI ( PPC32RI* );
+/* --------- Operand, which can be a vector reg or a s6. --------- */
+/* ("VI" == "Vector Register or Immediate") */
+typedef
+ enum {
+ Pvi_Imm=5,
+ Pvi_Reg=6
+ }
+ PPC32VI5sTag;
+
+typedef
+ struct {
+ PPC32VI5sTag tag;
+ union {
+ Char Imm5s;
+ HReg Reg;
+ }
+ Pvi;
+ }
+ PPC32VI5s;
+
+extern PPC32VI5s* PPC32VI5s_Imm ( Char );
+extern PPC32VI5s* PPC32VI5s_Reg ( HReg );
+
+extern void ppPPC32VI5s ( PPC32VI5s* );
+
+
/* --------- Instructions. --------- */
/* --------- */
struct {
UChar sz; /* 8,16,32 */
HReg dst;
- PPC32RI* src;
+ PPC32VI5s* src;
} AvSplat;
/* Mov src to dst on the given condition, which may not
be the bogus Xcc_ALWAYS. */
extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR );
-extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src );
+extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src );
extern PPC32Instr* PPC32Instr_AvCMov ( PPC32CondCode, HReg dst, HReg src );
extern PPC32Instr* PPC32Instr_AvLdVSCR ( HReg src );
//.. add_to_esp(env, 8);
//.. }
+/*
+ Generates code for AvSplat
+ - takes in IRExpr* of type 8|16|32
+ returns vector reg of duplicated lanes of input
+ - uses AvSplat(imm) for imms up to simm6.
+ otherwise must use store reg & load vector
+*/
+static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
+{
+ HReg dst = newVRegV(env);
+ PPC32RI* ri = iselIntExpr_RI(env, e);
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ UInt sz = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+ HReg r_src;
+ /* special case: immediate */
+ if (ri->tag == Pri_Imm) {
+ Int simm32 = (Int)ri->Pri.Imm;
+
+ /* figure out if it's do-able with imm splats. */
+ if (simm32 >= -32 && simm32 <= 31) {
+ Char simm6 = (Char)simm32;
+ if (simm6 > 15) { /* 16:31 inclusive */
+ HReg v1 = newVRegV(env);
+ HReg v2 = newVRegV(env);
+ addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16)));
+ addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6-16)));
+ addInstr(env, PPC32Instr_AvBinary(Pav_SUBUM, dst, v2, v1));
+ return dst;
+ }
+ if (simm6 < -16) { /* -32:-17 inclusive */
+ HReg v1 = newVRegV(env);
+ HReg v2 = newVRegV(env);
+ addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16)));
+ addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6+16)));
+ addInstr(env, PPC32Instr_AvBinary(Pav_ADDUM, dst, v2, v1));
+ return dst;
+ }
+ /* simplest form: -16:15 inclusive */
+ addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Imm(simm6)));
+ return dst;
+ }
+
+ /* no luck; use the Slow way. */
+ r_src = newVRegI(env);
+ addInstr(env, PPC32Instr_LI32(r_src, (UInt)simm32));
+ }
+ else {
+ r_src = ri->Pri.Reg;
+ }
+
+ /* default case: store r_src in lowest lane of 16-aligned mem,
+ load vector, splat lowest lane to dst */
+ {
+ /* CAB: Perhaps faster to store r_src multiple times (sz dependent),
+ and simply load the vector? */
+
+ HReg v_src = newVRegV(env);
+ PPC32AMode *am_off12;
+
+ sub_from_sp( env, 32 ); // Move SP down
+ /* Get a 16-aligned address within our stack space */
+ HReg r_aligned16 = get_sp_aligned16( env );
+ am_off12 = PPC32AMode_IR( 12, r_aligned16);
+
+ /* Store r_src in low word of 16-aligned mem */
+ addInstr(env, PPC32Instr_Store( 4, am_off12, r_src ));
+
+ /* Load src to vector[low lane] */
+ addInstr(env, PPC32Instr_AvLdSt( True/*load*/, 4, v_src, am_off12 ));
+ add_to_sp( env, 32 ); // Reset SP
+
+ /* Finally, splat v_src[low_lane] to dst */
+ addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Reg(v_src)));
+ return dst;
+ }
+}
+
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
static PPC32RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
{
IRType ty = typeOfIRExpr(env->type_env,e);
- vassert(ty == Ity_I32);
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
/* special case: immediate */
if (e->tag == Iex_Const) {
UInt u;
switch (e->Iex.Const.con->tag) {
- case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & e->Iex.Const.con->Ico.U16; break;
+ case Ico_U8: u = 0xFF & e->Iex.Const.con->Ico.U8; break;
default: vpanic("iselIntExpr_RI.Iex_Const(ppc32h)");
}
return PPC32RI_Imm(u);
//.. return dst;
//.. }
+ case Iop_Dup32x4: {
+ HReg dst = mk_AvDuplicateRI(env, e->Iex.Binop.arg1);
+ return dst;
+ }
+
default:
break;
} /* switch (e->Iex.Unop.op) */
do_AvBin: {
HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
- HReg dst = newVRegV(env);
+ HReg dst = newVRegV(env);
addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2));
return dst;
}
//.. case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
//.. case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
//.. case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
+
+ case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
+ do_AvShiftV128: {
+ HReg dst = newVRegV(env);
+ HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPC32Instr_AvBinary(op, dst, r_src, v_shft));
+ return dst;
+ }
+
//.. do_SseShift: {
//.. HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);