From: Florian Krohm Date: Mon, 14 Jul 2025 16:32:06 +0000 (+0000) Subject: Add folding for Iop_PopCount32/64 and Iop_CmpNEZ16 (BZ 506211) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ca10852231e27b430588c51c8d0d492e054f4f37;p=thirdparty%2Fvalgrind.git Add folding for Iop_PopCount32/64 and Iop_CmpNEZ16 (BZ 506211) Part of fixing https://bugs.kde.org/show_bug.cgi?id=506211 --- diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index 52b8e0957..f0458cb28 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -1346,6 +1346,31 @@ static UInt fold_Clz32 ( UInt value ) return 0; } +/* Helpers for folding PopCount32/64. + https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan + As many iterations as 1-bits present. +*/ +static UInt fold_PopCount64 ( ULong value ) +{ + UInt count; + + for (count = 0; value != 0; ++count) { + value &= value - 1; // clear the least significant 1-bit + } + return count; +} + +static UInt fold_PopCount32 ( UInt value ) +{ + UInt count; + + for (count = 0; value != 0; ++count) { + value &= value - 1; // clear the least significant 1-bit + } + return count; +} + + /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to the corresponding real constant. */ //XXX re-check this before use @@ -1604,6 +1629,12 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e ) (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8) ))); break; + case Iop_CmpNEZ16: + e2 = IRExpr_Const(IRConst_U1(toBool( + 0 != + (0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16) + ))); + break; case Iop_CmpNEZ32: e2 = IRExpr_Const(IRConst_U1(toBool( 0 != @@ -1678,6 +1709,17 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e ) break; } + case Iop_PopCount32: { + UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; + e2 = IRExpr_Const(IRConst_U32(fold_PopCount32(u32))); + break; + } + case Iop_PopCount64: { + ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; + e2 = IRExpr_Const(IRConst_U64(fold_PopCount64(u64))); + break; + } + /* For these vector ones, can't fold all cases, but at least do the most obvious one. Could do better here using summarise/desummarise of vector constants, but too diff --git a/none/tests/iropt-test/irops.tab b/none/tests/iropt-test/irops.tab index 45b0e728c..e73ec46a8 100644 --- a/none/tests/iropt-test/irops.tab +++ b/none/tests/iropt-test/irops.tab @@ -80,7 +80,7 @@ // { OPNAME(128HIto64), Ity_I64, 1, Ity_I128, }, // 128 bit { OPNAME(CmpNEZ8), Ity_I1, 1, Ity_I8 }, -// { OPNAME(CmpNEZ16), Ity_I1, 1, Ity_I16 }, // no folding yet + { OPNAME(CmpNEZ16), Ity_I1, 1, Ity_I16 }, { OPNAME(CmpNEZ32), Ity_I1, 1, Ity_I32 }, { OPNAME(CmpNEZ64), Ity_I1, 1, Ity_I64 }, @@ -104,8 +104,8 @@ // { OPNAME(CtzNat32), Ity_I32, 1, Ity_I32 }, // no folding yet // { OPNAME(CtzNat64), Ity_I64, 1, Ity_I64 }, // no folding yet -// { OPNAME(PopCount32), Ity_I32, 1, Ity_I32 }, // no folding yet -// { OPNAME(PopCount64), Ity_I64, 1, Ity_I64 }, // no folding yet + { OPNAME(PopCount32), Ity_I32, 1, Ity_I32 }, + { OPNAME(PopCount64), Ity_I64, 1, Ity_I64 }, diff --git a/none/tests/iropt-test/unary.c b/none/tests/iropt-test/unary.c index 537c29a72..44af3203d 100644 --- a/none/tests/iropt-test/unary.c +++ b/none/tests/iropt-test/unary.c @@ -29,6 +29,7 @@ static void check_result(const irop_t *, const test_data_t *); static void run_tests(const irop_t *, test_data_t *, unsigned, uint64_t *); static uint64_t left(uint64_t, unsigned); +static uint32_t popcount(uint64_t); void @@ -181,7 +182,7 @@ check_result(const irop_t *op, const test_data_t *data) case Iop_64HIto32: expected = opnd >> 32; break; case Iop_CmpNEZ8: -// case Iop_CmpNEZ16: + case Iop_CmpNEZ16: case Iop_CmpNEZ32: case Iop_CmpNEZ64: expected = opnd != 0; @@ -195,6 +196,11 @@ check_result(const irop_t *op, const test_data_t *data) case Iop_Left32: expected = left(opnd, 32); break; case Iop_Left64: expected = left(opnd, 64); break; + case Iop_PopCount32: + case Iop_PopCount64: + expected = popcount(opnd); + break; + default: panic("%s: operator %s not handled\n", __func__, op->name); } @@ -250,3 +256,16 @@ left(uint64_t val, unsigned width) panic(__func__); } } + + +/* Naive implementation of counting 1-bits */ +static uint32_t +popcount(uint64_t value) +{ + uint32_t count; + + for (count = 0; value != 0; value >>= 1) { + count += value & 1; + } + return count; +}