In the IR optimiser (ir_opt.c): Recognise the following IROps as
dependency-breaking ops that generate an all-ones output: Iop_CmpEQ16x4
Iop_CmpEQ32x2 Iop_CmpEQ64x2 Iop_CmpEQ8x32 Iop_CmpEQ16x16 Iop_CmpEQ64x4. I
think this fixes all the known cases for sizes 32 bits to 256 bits. It also
fixes bug 425820.
case Iop_Or32:
return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
case Iop_CmpEQ8x8:
+ case Iop_CmpEQ16x4:
+ case Iop_CmpEQ32x2:
case Iop_Or64:
return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
case Iop_CmpEQ8x16:
case Iop_CmpEQ16x8:
case Iop_CmpEQ32x4:
+ case Iop_CmpEQ64x2:
return IRExpr_Const(IRConst_V128(0xFFFF));
+ case Iop_CmpEQ8x32:
+ case Iop_CmpEQ16x16:
case Iop_CmpEQ32x8:
+ case Iop_CmpEQ64x4:
return IRExpr_Const(IRConst_V256(0xFFFFFFFF));
default:
ppIROp(op);
}
break;
+ // in total 32 bits
case Iop_CmpEQ32:
+ // in total 64 bits
case Iop_CmpEQ64:
case Iop_CmpEQ8x8:
+ case Iop_CmpEQ16x4:
+ case Iop_CmpEQ32x2:
+ // in total 128 bits
case Iop_CmpEQ8x16:
case Iop_CmpEQ16x8:
case Iop_CmpEQ32x4:
+ case Iop_CmpEQ64x2:
+ // in total 256 bits
+ case Iop_CmpEQ8x32:
+ case Iop_CmpEQ16x16:
case Iop_CmpEQ32x8:
+ case Iop_CmpEQ64x4:
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
break;