From: Jakub Jelinek Date: Mon, 20 Nov 2023 09:03:20 +0000 (+0100) Subject: tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for... X-Git-Tag: basepoints/gcc-15~4526 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=103a3966bc7b68f91b6cd3c5beb330c4b8570c90;p=thirdparty%2Fgcc.git tree-ssa-math-opts: popcount (X) == 1 to (X ^ (X - 1)) > (X - 1) optimization for direct optab [PR90693] On Fri, Nov 17, 2023 at 03:01:04PM +0100, Jakub Jelinek wrote: > As a follow-up, I'm considering changing in this routine the popcount > call to IFN_POPCOUNT with 2 arguments and during expansion test costs. Here is the follow-up which does the rtx costs testing. 2023-11-20 Jakub Jelinek PR tree-optimization/90693 * tree-ssa-math-opts.cc (match_single_bit_test): Mark POPCOUNT with result only used in equality comparison against 1 with direct optab support as .POPCOUNT call with 2 arguments. * internal-fn.h (expand_POPCOUNT): Declare. * internal-fn.def (DEF_INTERNAL_INT_EXT_FN): New macro, document it, undefine at the end. (POPCOUNT): Use it instead of DEF_INTERNAL_INT_FN. * internal-fn.cc (DEF_INTERNAL_INT_EXT_FN): Define to nothing before inclusion to define expanders. (expand_POPCOUNT): New function. --- diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index cd46875696af..cce73b3b2e08 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4285,6 +4285,7 @@ set_edom_supported_p (void) { \ expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab); \ } +#define DEF_INTERNAL_INT_EXT_FN(CODE, FLAGS, OPTAB, TYPE) #define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \ UNSIGNED_OPTAB, TYPE) \ static void \ @@ -5097,3 +5098,67 @@ expand_BITINTTOFLOAT (internal_fn, gcall *stmt) if (val != target) emit_move_insn (target, val); } + +void +expand_POPCOUNT (internal_fn fn, gcall *stmt) +{ + if (gimple_call_num_args (stmt) == 1) + { + expand_unary_optab_fn (fn, stmt, popcount_optab); + return; + } + /* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it + because the result is only used in an equality comparison against 1. + Use rtx costs in that case to determine if .POPCOUNT (arg) == 1 + or (arg ^ (arg - 1)) > arg - 1 is cheaper. */ + bool speed_p = optimize_insn_for_speed_p (); + tree lhs = gimple_call_lhs (stmt); + tree arg = gimple_call_arg (stmt, 0); + tree type = TREE_TYPE (arg); + machine_mode mode = TYPE_MODE (type); + do_pending_stack_adjust (); + start_sequence (); + expand_unary_optab_fn (fn, stmt, popcount_optab); + rtx_insn *popcount_insns = get_insns (); + end_sequence (); + start_sequence (); + rtx plhs = expand_normal (lhs); + rtx pcmp = emit_store_flag (NULL_RTX, EQ, plhs, const1_rtx, mode, 0, 0); + if (pcmp == NULL_RTX) + { + fail: + end_sequence (); + emit_insn (popcount_insns); + return; + } + rtx_insn *popcount_cmp_insns = get_insns (); + end_sequence (); + start_sequence (); + rtx op0 = expand_normal (arg); + rtx argm1 = expand_simple_binop (mode, PLUS, op0, constm1_rtx, NULL_RTX, + 1, OPTAB_DIRECT); + if (argm1 == NULL_RTX) + goto fail; + rtx argxorargm1 = expand_simple_binop (mode, XOR, op0, argm1, NULL_RTX, + 1, OPTAB_DIRECT); + if (argxorargm1 == NULL_RTX) + goto fail; + rtx cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1); + if (cmp == NULL_RTX) + goto fail; + rtx_insn *cmp_insns = get_insns (); + end_sequence (); + unsigned popcount_cost = (seq_cost (popcount_insns, speed_p) + + seq_cost (popcount_cmp_insns, speed_p)); + unsigned cmp_cost = seq_cost (cmp_insns, speed_p); + if (popcount_cost <= cmp_cost) + emit_insn (popcount_insns); + else + { + emit_insn (cmp_insns); + plhs = expand_normal (lhs); + if (GET_MODE (cmp) != GET_MODE (plhs)) + cmp = convert_to_mode (GET_MODE (plhs), cmp, 1); + emit_move_insn (plhs, cmp); + } +} diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index d645075de53f..ba5e4ce272c0 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE) DEF_INTERNAL_FLT_FLOATN_FN (NAME, FLAGS, OPTAB, TYPE) DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE) + DEF_INTERNAL_INT_EXT_FN (NAME, FLAGS, OPTAB, TYPE) DEF_INTERNAL_COND_FN (NAME, FLAGS, OPTAB, TYPE) DEF_INTERNAL_SIGNED_COND_FN (NAME, FLAGS, OPTAB, TYPE) DEF_INTERNAL_WIDENING_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, @@ -100,6 +101,10 @@ along with GCC; see the file COPYING3. If not see says that the function extends the C-level BUILT_IN_{,L,LL,IMAX} group of functions to any integral mode (including vector modes). + DEF_INTERNAL_INT_EXT_FN is like DEF_INTERNAL_INT_FN, except that it + has expand_##NAME defined in internal-fn.cc to override the + DEF_INTERNAL_INT_FN expansion behavior. + DEF_INTERNAL_WIDENING_OPTAB_FN is a wrapper that defines five internal functions with DEF_INTERNAL_SIGNED_OPTAB_FN: - one that describes a widening operation with the same number of elements @@ -163,6 +168,11 @@ along with GCC; see the file COPYING3. If not see DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE) #endif +#ifndef DEF_INTERNAL_INT_EXT_FN +#define DEF_INTERNAL_INT_EXT_FN(NAME, FLAGS, OPTAB, TYPE) \ + DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE) +#endif + #ifndef DEF_INTERNAL_WIDENING_OPTAB_FN #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \ DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \ @@ -435,7 +445,7 @@ DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary) DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary) DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary) DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary) -DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary) +DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary) DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) @@ -578,6 +588,7 @@ DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ") #undef DEF_INTERNAL_WIDENING_OPTAB_FN #undef DEF_INTERNAL_SIGNED_COND_FN #undef DEF_INTERNAL_COND_FN +#undef DEF_INTERNAL_INT_EXT_FN #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_FLT_FLOATN_FN diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 7d72f4db2d05..93598fac1c2d 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -262,6 +262,7 @@ extern void expand_MULBITINT (internal_fn, gcall *); extern void expand_DIVMODBITINT (internal_fn, gcall *); extern void expand_FLOATTOBITINT (internal_fn, gcall *); extern void expand_BITINTTOFLOAT (internal_fn, gcall *); +extern void expand_POPCOUNT (internal_fn, gcall *); extern bool vectorized_internal_fn_supported_p (internal_fn, tree); diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 9bfc2d3f68f0..bcc02aec6cbe 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -5195,7 +5195,16 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt) if (!INTEGRAL_TYPE_P (type)) return; if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH)) - return; + { + /* Tell expand_POPCOUNT the popcount result is only used in equality + comparison with one, so that it can decide based on rtx costs. */ + gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg, + integer_one_node); + gimple_call_set_lhs (g, gimple_call_lhs (call)); + gimple_stmt_iterator gsi2 = gsi_for_stmt (call); + gsi_replace (&gsi2, g, true); + return; + } tree argm1 = make_ssa_name (type); gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg, build_int_cst (type, -1));