From: Kewen Lin Date: Thu, 13 Jul 2023 02:23:21 +0000 (-0500) Subject: vect: Adjust vectorizable_load costing on VMAT_LOAD_STORE_LANES X-Git-Tag: basepoints/gcc-15~7647 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c4a8f4414215be6bf381398fdb391afd1440c8f9;p=thirdparty%2Fgcc.git vect: Adjust vectorizable_load costing on VMAT_LOAD_STORE_LANES This patch adjusts the cost handling on VMAT_LOAD_STORE_LANES in function vectorizable_load. We don't call function vect_model_load_cost for it any more. It follows what we do in the function vect_model_load_cost, and shouldn't have any functional changes. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_load): Adjust the cost handling on VMAT_LOAD_STORE_LANES without calling vect_model_load_cost. (vectorizable_load): Remove VMAT_LOAD_STORE_LANES related handling and assert it will never get VMAT_LOAD_STORE_LANES. --- diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index cbf9e28ea3c3..033946909e6c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1153,7 +1153,8 @@ vect_model_load_cost (vec_info *vinfo, gcc_assert (memory_access_type != VMAT_GATHER_SCATTER && memory_access_type != VMAT_INVARIANT && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_STRIDED_SLP); + && memory_access_type != VMAT_STRIDED_SLP + && memory_access_type != VMAT_LOAD_STORE_LANES); unsigned int inside_cost = 0, prologue_cost = 0; bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info); @@ -1194,31 +1195,6 @@ vect_model_load_cost (vec_info *vinfo, once per group anyhow. */ bool first_stmt_p = (first_stmt_info == stmt_info); - /* An IFN_LOAD_LANES will load all its vector results, regardless of which - ones we actually need. Account for the cost of unused results. */ - if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES) - { - unsigned int gaps = DR_GROUP_SIZE (first_stmt_info); - stmt_vec_info next_stmt_info = first_stmt_info; - do - { - gaps -= 1; - next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); - } - while (next_stmt_info); - if (gaps) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vect_model_load_cost: %d unused vectors.\n", - gaps); - vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, - alignment_support_scheme, misalignment, false, - &inside_cost, &prologue_cost, - cost_vec, cost_vec, true); - } - } - /* We assume that the cost of a single load-lanes instruction is equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped access is instead being provided by a load-and-permute operation, @@ -10358,7 +10334,7 @@ vectorizable_load (vec_info *vinfo, } tree vec_mask = NULL_TREE; poly_uint64 group_elt = 0; - unsigned int inside_cost = 0; + unsigned int inside_cost = 0, prologue_cost = 0; for (j = 0; j < ncopies; j++) { /* 1. Create the vector or array pointer update chain. */ @@ -10438,8 +10414,42 @@ vectorizable_load (vec_info *vinfo, dr_chain.create (vec_num); gimple *new_stmt = NULL; - if (memory_access_type == VMAT_LOAD_STORE_LANES && !costing_p) + if (memory_access_type == VMAT_LOAD_STORE_LANES) { + if (costing_p) + { + /* An IFN_LOAD_LANES will load all its vector results, + regardless of which ones we actually need. Account + for the cost of unused results. */ + if (grouped_load && first_stmt_info == stmt_info) + { + unsigned int gaps = DR_GROUP_SIZE (first_stmt_info); + stmt_vec_info next_stmt_info = first_stmt_info; + do + { + gaps -= 1; + next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); + } + while (next_stmt_info); + if (gaps) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_model_load_cost: %d " + "unused vectors.\n", + gaps); + vect_get_load_cost (vinfo, stmt_info, gaps, + alignment_support_scheme, + misalignment, false, &inside_cost, + &prologue_cost, cost_vec, cost_vec, + true); + } + } + vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme, + misalignment, false, &inside_cost, + &prologue_cost, cost_vec, cost_vec, true); + continue; + } tree vec_array; vec_array = create_vector_array (vectype, vec_num); @@ -11090,13 +11100,14 @@ vec_num_loop_costing_end: if (costing_p) { - if (memory_access_type == VMAT_GATHER_SCATTER) + if (memory_access_type == VMAT_GATHER_SCATTER + || memory_access_type == VMAT_LOAD_STORE_LANES) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_load_cost: inside_cost = %u, " - "prologue_cost = 0 .\n", - inside_cost); + "prologue_cost = %u .\n", + inside_cost, prologue_cost); } else vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,