From: Richard Biener Date: Mon, 11 Jan 2021 10:47:46 +0000 (+0100) Subject: tree-optimization/98526 - fix vectorizer reduction cost X-Git-Tag: releases/gcc-10.3.0~258 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f0d8562af81709db39d7899993dd2bf98af28ec;p=thirdparty%2Fgcc.git tree-optimization/98526 - fix vectorizer reduction cost This fixes a double-counting in the reduction cost when vectorizing the reduction through the regular vectorizable_* functions. 2021-01-11 Richard Biener PR tree-optimization/98526 * tree-vect-loop.c (vect_model_reduction_cost): Remove costing of the actual reduction op for the regular case. (vectorizable_reduction): Cost the stmts vect_transform_reduction produces here. (cherry picked from commit 04bff1bbfc11a974342c0eb0c0d65d902e36e82e) --- diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index bb048075b2f5..cfeeac5f84e2 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3964,8 +3964,8 @@ have_whole_vector_shift (machine_mode mode) /* Function vect_model_reduction_cost. Models cost for a reduction operation, including the vector ops - generated within the strip-mine loop, the initial definition before - the loop, and the epilogue code that must be generated. */ + generated within the strip-mine loop in some cases, the initial + definition before the loop, and the epilogue code that must be generated. */ static void vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, @@ -4028,10 +4028,6 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, scalar_to_vec, stmt_info, 0, vect_prologue); - - /* Cost of reduction op inside loop. */ - inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); } /* Determine cost of epilogue code. @@ -6775,6 +6771,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node, vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies, cost_vec); + /* Cost the reduction op inside the loop if transformed via + vect_transform_reduction. Otherwise this is costed by the + separate vectorizable_* routines. */ + if (single_defuse_cycle + || code == DOT_PROD_EXPR + || code == WIDEN_SUM_EXPR + || code == SAD_EXPR) + record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body); + if (dump_enabled_p () && reduction_type == FOLD_LEFT_REDUCTION) dump_printf_loc (MSG_NOTE, vect_location,