}
-/* Masked load. */
-static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi,
- Prefix pfx, Long delta,
- const HChar* opname, Bool isYMM, IRType ty )
+/* Masked load or masked store. */
+static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ const HChar* opname, Bool isYMM, IRType ty,
+ Bool isLoad )
{
HChar dis_buf[50];
Int alen, i;
UChar modrm = getUChar(delta);
UInt rG = gregOfRexRM(pfx,modrm);
UInt rV = getVexNvvvv(pfx);
- IRTemp res[8], cond;
+
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- if (isYMM) {
+ delta += alen;
+
+ /**/ if (isLoad && isYMM) {
DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
- } else {
+ }
+ else if (isLoad && !isYMM) {
DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
}
- delta += alen;
- for (i = 0; i < sizeof(res)/sizeof(res[0]); i++)
- res[i] = IRTemp_INVALID;
+ else if (!isLoad && isYMM) {
+ DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), nameYMMReg(rG), dis_buf );
+ }
+ else {
+ vassert(!isLoad && !isYMM);
+ DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), nameXMMReg(rG), dis_buf );
+ }
- for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
- res[i] = newTemp(ty);
- cond = newTemp(Ity_I1);
- assign( cond,
- binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
- ty == Ity_I32 ? getYMMRegLane32( rV, i )
- : getYMMRegLane64( rV, i ),
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ Bool laneIs32 = ty == Ity_I32;
+
+ Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
+
+ for (i = 0; i < nLanes; i++) {
+ IRTemp cond = newTemp(Ity_I1);
+ assign( cond,
+ binop(laneIs32 ? Iop_CmpLT32S : Iop_CmpLT64S,
+ (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i ),
mkU(ty, 0) ));
- stmt(
- IRStmt_LoadG(
- Iend_LE,
- ty == Ity_I32 ? ILGop_Ident32 : ILGop_Ident64,
- res[i],
- binop(Iop_Add64, mkexpr(addr), mkU64(i * (ty == Ity_I32 ? 4 : 8))),
- ty == Ity_I32 ? mkU32(0) : mkU64(0),
- mkexpr(cond)
- ));
- }
- switch (ty) {
- case Ity_I32:
- for (i = 0; i < 8; i++)
- putYMMRegLane32( rG, i, (i < 4 || isYMM)
- ? mkexpr(res[i]) : mkU32(0) );
- break;
- case Ity_I64:
- for (i = 0; i < 4; i++)
- putYMMRegLane64( rG, i, (i < 2 || isYMM)
- ? mkexpr(res[i]) : mkU64(0) );
- break;
- default: vassert(0);
+ IRTemp data = newTemp(ty);
+ IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
+ mkU64(i * (laneIs32 ? 4 : 8)));
+ if (isLoad) {
+ stmt(
+ IRStmt_LoadG(
+ Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
+ data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
+ ));
+ (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
+ } else {
+ assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
+ stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
+ }
}
+ if (isLoad && !isYMM)
+ putYMMRegLane128( rG, 1, mkV128(0) );
+
*uses_vvvv = True;
return delta;
}
/* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
- /*!isYMM*/False, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*!isYMM*/False, Ity_I32, /*isLoad*/True );
goto decode_success;
}
/* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
- /*isYMM*/True, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
+ /*isYMM*/True, Ity_I32, /*isLoad*/True );
goto decode_success;
}
break;
/* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2D /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
- /*!isYMM*/False, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*!isYMM*/False, Ity_I64, /*isLoad*/True );
goto decode_success;
}
/* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2D /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
- /*isYMM*/True, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
+ /*isYMM*/True, Ity_I64, /*isLoad*/True );
goto decode_success;
}
break;
/* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
- /*!isYMM*/False, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*!isYMM*/False, Ity_I32, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
- /*isYMM*/True, Ity_I32 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*isYMM*/True, Ity_I32, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
&& 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
- /*!isYMM*/False, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*!isYMM*/False, Ity_I64, /*isLoad*/True );
goto decode_success;
}
/* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
&& 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
- delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
- /*isYMM*/True, Ity_I64 );
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*isYMM*/True, Ity_I64, /*isLoad*/True );
+ goto decode_success;
+ }
+ break;
+
+ case 0x8E:
+ /* VPMASKMOVD xmm2, xmm1, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVD ymm2, ymm1, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
+ /*isYMM*/True, Ity_I32, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVQ xmm2, xmm1, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
+ if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
+ && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
+ goto decode_success;
+ }
+ /* VPMASKMOVQ ymm2, ymm1, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
+ if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
+ && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
+ delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
+ /*isYMM*/True, Ity_I64, /*!isLoad*/False );
goto decode_success;
}
break;
i->Ain.CLoad.szB = szB;
i->Ain.CLoad.addr = addr;
i->Ain.CLoad.dst = dst;
- vassert(cond != Acc_ALWAYS);
+ vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
+ return i;
+}
+AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
+ HReg src, AMD64AMode* addr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_CStore;
+ i->Ain.CStore.cond = cond;
+ i->Ain.CStore.szB = szB;
+ i->Ain.CStore.src = src;
+ i->Ain.CStore.addr = addr;
+ vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
return i;
}
AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
case Ain_CLoad:
vex_printf("if (%%rflags.%s) { ",
showAMD64CondCode(i->Ain.CLoad.cond));
- vex_printf("mov%c (", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
+ vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
ppAMD64AMode(i->Ain.CLoad.addr);
- vex_printf("), ");
+ vex_printf(", ");
(i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
(i->Ain.CLoad.dst);
vex_printf(" }");
return;
+ case Ain_CStore:
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.CStore.cond));
+ vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
+ (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.CStore.src);
+ vex_printf(", ");
+ ppAMD64AMode(i->Ain.CStore.addr);
+ vex_printf(" }");
+ return;
+
case Ain_MovxLQ:
vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
return;
+ case Ain_CStore:
+ addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
+ addHRegUse(u, HRmRead, i->Ain.CStore.src);
+ return;
case Ain_MovxLQ:
addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
mapReg(m, &i->Ain.CLoad.dst);
return;
+ case Ain_CStore:
+ mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
+ mapReg(m, &i->Ain.CStore.src);
+ return;
case Ain_MovxLQ:
mapReg(m, &i->Ain.MovxLQ.src);
mapReg(m, &i->Ain.MovxLQ.dst);
goto done;
}
+ case Ain_CStore: {
+ /* AFAICS this is identical to Ain_CStore except that the opcode
+ is 0x89 instead of 0x8B. */
+ vassert(i->Ain.CStore.cond != Acc_ALWAYS);
+
+ /* Only 32- or 64-bit variants are allowed. */
+ vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ /* Now the store. */
+ rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
+ *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
+
+ /* Fix up the conditional branch */
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ goto done;
+ }
+
case Ain_MovxLQ:
/* No, _don't_ ask me why the sense of the args has to be
different in the S vs Z case. I don't know. */