(match (clz_table_index @1 @2 @3)
(rshift (mult (msb_or_cascade_64 @1) INTEGER_CST@2) INTEGER_CST@3))
+/* Match count leading zeros for simplify_count_zeroes in forwprop,
+ variant that isolates the MSB as a power of two via (s - (s >> 1))
+ after the OR-cascade. For s = 2^(k+1) - 1 (the cascade output),
+ (s - (s >> 1)) equals 2^k, so the subsequent DeBruijn
+ multiply-and-shift is a CTZ-style lookup on the isolated MSB. The
+ table has to satisfy the direct CTZ DeBruijn property (validated by
+ the CTZ checkfn in simplify_count_zeroes).
+ PR tree-optimization/122569. */
+(match (clz_msb_iso_table_index @1 @2 @3)
+ (rshift (mult
+ (minus (msb_or_cascade_64@f @1) (rshift @f INTEGER_CST@sub1))
+ INTEGER_CST@2) INTEGER_CST@3)
+ (if (compare_tree_int (@sub1, 1) == 0)))
+
/* Floatint point/integer comparison and integer->integer
or floating point -> float point conversion. */
(match (cond_expr_convert_p @0 @2 @3 @6)
--- /dev/null
+/* PR tree-optimization/122569 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1-details" } */
+
+/* Test that the forwprop DeBruijn CLZ matcher recognizes the variant
+ idiom that isolates the MSB as a power of two via (s - (s >> 1))
+ after the OR-cascade. This pattern uses a CTZ-style DeBruijn magic
+ applied to 2^MSB, not to the all-bits-below value directly.
+ Reported on PR 122569 as a second reproducer.
+
+ The table element type here is unsigned long (64-bit on LP64 targets)
+ rather than int, which exercises the relaxed element-type check. */
+
+typedef unsigned long long uint64_t;
+
+void
+get_msb_index (unsigned long *result, uint64_t value)
+{
+ static const unsigned long deBruijnTable64[64] = {
+ 63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
+ 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30,
+ 34, 11, 43, 14, 22, 4, 62, 57, 46, 52, 38, 26, 32,
+ 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
+ 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5
+ };
+
+ value |= value >> 1;
+ value |= value >> 2;
+ value |= value >> 4;
+ value |= value >> 8;
+ value |= value >> 16;
+ value |= value >> 32;
+
+ *result = deBruijnTable64[((value - (value >> 1))
+ * (uint64_t) 0x07EDD5E59A4E28C2ULL) >> 58];
+}
+
+/* { dg-final { scan-tree-dump "__builtin_clz|\\.CLZ" "forwprop1" { target { clzll && { lp64 || llp64 } } } } } */
/* Match.pd function to match the ctz expression. */
extern bool gimple_ctz_table_index (tree, tree *, tree (*)(tree));
extern bool gimple_clz_table_index (tree, tree *, tree (*)(tree));
+extern bool gimple_clz_msb_iso_table_index (tree, tree *, tree (*)(tree));
/* Recognize count leading and trailing zeroes idioms.
The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
gcc_checking_assert (TREE_CODE (array_ref) == ARRAY_REF);
internal_fn fn = IFN_LAST;
+ /* When true, the matched idiom is a CLZ using DeBruijn CTZ on the
+ isolated MSB -- see clz_msb_iso_table_index in match.pd. The
+ table stores MSB positions and must satisfy the direct CTZ
+ DeBruijn property, so we validate it with the CTZ checkfn even
+ though we emit IFN_CLZ code. */
+ bool clz_via_ctz = false;
/* For CTZ we recognize ((x & -x) * C) >> SHIFT where the array data
represents the number of trailing zeros. */
if (gimple_ctz_table_index (TREE_OPERAND (array_ref, 1), &res_ops[0], NULL))
else if (gimple_clz_table_index (TREE_OPERAND (array_ref, 1), &res_ops[0],
NULL))
fn = IFN_CLZ;
+ /* Variant CLZ idiom: after the OR-cascade sets all bits from 0 to
+ the original MSB, (value - (value >> 1)) isolates the MSB as a
+ power of two (2^k), and the subsequent DeBruijn multiply-and-shift
+ is a CTZ-style lookup on 2^k. The table stores MSB positions
+ directly. */
+ else if (gimple_clz_msb_iso_table_index (TREE_OPERAND (array_ref, 1),
+ &res_ops[0], NULL))
+ {
+ fn = IFN_CLZ;
+ clz_via_ctz = true;
+ }
else
return false;
tree input_type = TREE_TYPE (res_ops[0]);
unsigned input_bits = tree_to_shwi (TYPE_SIZE (input_type));
- /* Check the array element type is not wider than 32 bits and the input is
- an unsigned 32-bit or 64-bit type. */
- if (TYPE_PRECISION (type) > 32 || !TYPE_UNSIGNED (input_type))
+ /* Check the array element type is integral and not wider than 64 bits,
+ and the input is an unsigned 32-bit or 64-bit type. The table values
+ are bit positions in [0, input_bits - 1], so any integer element type
+ with at least 6 bits of precision suffices; the cap is just to keep
+ the transformation simple. */
+ if (!INTEGRAL_TYPE_P (type) || TYPE_PRECISION (type) > 64
+ || !TYPE_UNSIGNED (input_type))
return false;
if (input_bits != 32 && input_bits != 64)
return false;
if (!ctor)
return false;
unsigned HOST_WIDE_INT mulval = tree_to_uhwi (res_ops[1]);
- if (fn == IFN_CTZ)
+ /* CTZ and the MSB-isolation CLZ variant both use the direct CTZ
+ DeBruijn check (table[(magic << data) >> shift] == data). */
+ if (fn == IFN_CTZ || clz_via_ctz)
{
auto checkfn = [&](unsigned data, unsigned i) -> bool
{