static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
{
- /* Set Z=1 iff (vecE & vecG) == 0
- Set C=1 iff (vecE & not vecG) == 0
+ /* Set Z=1 iff (vecE & vecG) == 0--(128)--0
+ Set C=1 iff (vecE & not vecG) == 0--(128)--0
+
+ For the case `sign == 0`, be careful to use only IROps that can be
+ instrumented exactly by memcheck. This is because PTEST is used for
+ __builtin_strcmp in gcc14. See
+ https://bugzilla.redhat.com/show_bug.cgi?id=2257546
*/
/* andV, andnV: vecE & vecG, vecE and not(vecG) */
- /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
+ /* andV resp. andnV, are reduced to 64-bit values by or-ing the top
and bottom 64-bits together. It relies on this trick:
InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
binop(Iop_InterleaveHI64x2,
mkexpr(andnV), mkexpr(andnV)))));
+ // Make z64 and c64 be either all-0s or all-1s
IRTemp z64 = newTemp(Ity_I64);
IRTemp c64 = newTemp(Ity_I64);
+
if (sign == 64) {
- /* When only interested in the most significant bit, just shift
- arithmetically right and negate. */
+ /* When only interested in the most significant bit, just copy bit 63
+ into all bit positions, then invert. */
assign(z64,
unop(Iop_Not64,
binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
assign(c64,
unop(Iop_Not64,
binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
- } else {
- if (sign == 32) {
- /* When interested in bit 31 and bit 63, mask those bits and
- fallthrough into the PTEST handling. */
- IRTemp t0 = newTemp(Ity_I64);
- IRTemp t1 = newTemp(Ity_I64);
- IRTemp t2 = newTemp(Ity_I64);
- assign(t0, mkU64(0x8000000080000000ULL));
- assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
- assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
- and64 = t1;
- andn64 = t2;
- }
- /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
- slice out the Z and C bits conveniently. We use the standard
- trick all-zeroes -> all-zeroes, anything-else -> all-ones
- done by "(x | -x) >>s (word-size - 1)".
- */
+ } else if (sign == 32) {
+ /* If we're interested into bits 63 and 31, OR bit 31 into bit 63, copy
+ bit 63 into all bit positions, then invert. */
+ IRTemp and3264 = newTemp(Ity_I64);
+ assign(and3264, binop(Iop_Or64, mkexpr(and64),
+ binop(Iop_Shl64, mkexpr(and64), mkU8(32))));
assign(z64,
unop(Iop_Not64,
- binop(Iop_Sar64,
- binop(Iop_Or64,
- binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
- mkexpr(and64)), mkU8(63))));
+ binop(Iop_Sar64, mkexpr(and3264), mkU8(63))));
+ IRTemp andn3264 = newTemp(Ity_I64);
+ assign(andn3264, binop(Iop_Or64, mkexpr(andn64),
+ binop(Iop_Shl64, mkexpr(andn64), mkU8(32))));
assign(c64,
unop(Iop_Not64,
- binop(Iop_Sar64,
- binop(Iop_Or64,
- binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
- mkexpr(andn64)), mkU8(63))));
+ binop(Iop_Sar64, mkexpr(andn3264), mkU8(63))));
+ } else {
+ vassert(sign == 0);
+ assign(z64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(and64), mkU64(0)),
+ mkU64(~0ULL), mkU64(0ULL)));
+ assign(c64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(andn64), mkU64(0)),
+ mkU64(~0ULL), mkU64(0ULL)));
}
/* And finally, slice out the Z and C flags and set the flags
IRTemp andnV = newTemp(Ity_V128);
assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
assign(andnV, binop(Iop_AndV128,
- mkexpr(vecE),
- binop(Iop_XorV128, mkexpr(vecG),
- mkV128(0xFFFF))));
+ mkexpr(vecE), unop(Iop_NotV128, mkexpr(vecG))));
finish_xTESTy ( andV, andnV, sign );
return delta;
break;
}
+ /* Similarly .. */
+ case Iop_V256toV128_0: case Iop_V256toV128_1: {
+ UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
+ if (v256 == 0x00000000) {
+ e2 = IRExpr_Const(IRConst_V128(0x0000));
+ } else {
+ goto unhandled;
+ }
+ break;
+ }
+
case Iop_ZeroHI64ofV128: {
/* Could do better here -- only need to look at the bottom 64 bits
of the argument, really. */
}
/* -- V128 stuff -- */
+ case Iop_InterleaveLO64x2: // This, and the HI64, are created
+ case Iop_InterleaveHI64x2: // by the amd64 PTEST translation
case Iop_InterleaveLO8x16: {
/* This turns up a lot in Memcheck instrumentation of
Icc generated code. I don't know why. */
break;
}
}
+ /* AndV128( x, NotV128( x ) ) ==> 0...0 and
+ AndV256( x, NotV256( x ) ) ==> 0...0
+ This is generated by amd64 `ptest %xmmReg, %xmmReg`
+ (the same reg both times)
+ See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 */
+ if (e->Iex.Binop.op == Iop_AndV128
+ || e->Iex.Binop.op == Iop_AndV256) {
+ Bool isV256 = e->Iex.Binop.op == Iop_AndV256;
+ IRExpr* x1 = chase(env, e->Iex.Binop.arg1);
+ IRExpr* rhs = chase(env, e->Iex.Binop.arg2);
+ if (x1 && rhs
+ && rhs->tag == Iex_Unop
+ && rhs->Iex.Unop.op == (isV256 ? Iop_NotV256
+ : Iop_NotV128)) {
+ IRExpr* x2 = chase(env, rhs->Iex.Unop.arg);
+ if (x2 && sameIRExprs(env, x1, x2)) {
+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
+ break;
+ }
+ }
+ }
break;
case Iop_OrV128: