From 140a29ae847aa2e0a4aa1ec4d3066454f6db5e9c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 18 Sep 2025 11:32:09 +0200 Subject: [PATCH] Remove SLP_TREE_NUMBER_OF_VEC_STMTS The following removes the redundant SLP_TREE_NUMBER_OF_VEC_STMTS, replacing it with vect_get_num_copies. Previously it was already made sure that all setters adhere to that. * tree-vectorizer.h (_slp_tree::vec_stmts_size): Remove. (SLP_TREE_NUMBER_OF_VEC_STMTS): Likewise. * tree-vect-loop.cc (vectorizable_reduction): Adjust. (vect_transform_cycle_phi): Likewise. (vect_transform_lc_phi): Likewise. (vectorizable_recurr): Likewise. (vectorizable_induction): Likewise. (vectorizable_live_operation): Likewise. * tree-vect-slp.cc (_slp_tree::_slp_tree): Do not set SLP_TREE_NUMBER_OF_VEC_STMTS. (vect_slp_analyze_node_operations_1): Likewise. (vect_slp_analyze_node_operations): Likewise. (vect_prologue_cost_for_slp): Adjust. (vect_create_constant_vectors): Likewise. (vect_get_slp_vect_def): Likewise. (vect_transform_slp_perm_load_1): Likewise. (vectorizable_slp_permutation_1): Likewise. (vect_schedule_slp_node): Likewise. (vectorize_slp_instance_root_stmt): Likewise. * tree-vect-stmts.cc (vect_model_simple_cost): Likewise. (vectorizable_bswap): Likewise. (vectorizable_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. (vectorizable_early_exit): Likewise. --- gcc/tree-vect-loop.cc | 19 +++++------ gcc/tree-vect-slp.cc | 51 ++++++++++------------------ gcc/tree-vect-stmts.cc | 76 ++++++++++++++++++++---------------------- gcc/tree-vectorizer.h | 6 ---- 4 files changed, 63 insertions(+), 89 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index b58e4355e58..5bed19f10f8 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7374,7 +7374,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (STMT_VINFO_LIVE_P (phi_info)) return false; - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + ncopies = vect_get_num_copies (loop_vinfo, slp_node); gcc_assert (ncopies >= 1); @@ -8247,7 +8247,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Leave the scalar phi in place. */ return true; - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + vec_num = vect_get_num_copies (loop_vinfo, slp_node); /* Check whether we should use a single PHI node and accumulate vectors to one before the backedge. */ @@ -8502,7 +8502,7 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo, /* Vectorizes PHIs. */ bool -vectorizable_phi (bb_vec_info, +vectorizable_phi (bb_vec_info vinfo, stmt_vec_info stmt_info, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { @@ -8553,7 +8553,7 @@ vectorizable_phi (bb_vec_info, for the scalar and the vector PHIs. This avoids artificially favoring the vector path (but may pessimize it in some cases). */ if (gimple_phi_num_args (as_a (stmt_info->stmt)) > 1) - record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + record_stmt_cost (cost_vec, vect_get_num_copies (vinfo, slp_node), vector_stmt, slp_node, vectype, 0, vect_body); SLP_TREE_TYPE (slp_node) = phi_info_type; return true; @@ -8657,7 +8657,7 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; tree vectype = SLP_TREE_VECTYPE (slp_node); - unsigned ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + unsigned ncopies = vect_get_num_copies (loop_vinfo, slp_node); poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype); unsigned dist = SLP_TREE_LANES (slp_node); /* We need to be able to make progress with a single vector. */ @@ -9566,6 +9566,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, } } + unsigned nvects = vect_get_num_copies (loop_vinfo, slp_node); if (cost_vec) /* transformation not required. */ { unsigned inside_cost = 0, prologue_cost = 0; @@ -9584,8 +9585,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, return false; } /* loop cost for vec_loop. */ - inside_cost = record_stmt_cost (cost_vec, - SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + inside_cost = record_stmt_cost (cost_vec, nvects, vector_stmt, slp_node, 0, vect_body); /* prologue cost for vec_init (if not nested) and step. */ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, @@ -9645,7 +9645,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, } /* Now generate the IVs. */ - unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); gcc_assert (multiple_p (nunits * nvects, group_size)); unsigned nivs; unsigned HOST_WIDE_INT const_nunits; @@ -10195,7 +10194,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, all the slp vectors. Calculate which slp vector it is and the index within. */ int num_scalar = SLP_TREE_LANES (slp_node); - int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + int num_vec = vect_get_num_copies (vinfo, slp_node); poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index; /* Calculate which vector contains the result, and which lane of @@ -10223,7 +10222,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, "the loop.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - else if (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1) + else if (num_vec > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 895fb88ab7f..f553e8fba19 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -112,7 +112,6 @@ _slp_tree::_slp_tree () SLP_TREE_SCALAR_STMTS (this) = vNULL; SLP_TREE_SCALAR_OPS (this) = vNULL; SLP_TREE_VEC_DEFS (this) = vNULL; - SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0; SLP_TREE_CHILDREN (this) = vNULL; SLP_TREE_LOAD_PERMUTATION (this) = vNULL; SLP_TREE_LANE_PERMUTATION (this) = vNULL; @@ -8042,17 +8041,6 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, slp_instance node_instance, stmt_vector_for_cost *cost_vec) { - /* Calculate the number of vector statements to be created for the scalar - stmts in this node. It is the number of scalar elements in one scalar - iteration (DR_GROUP_SIZE) multiplied by VF divided by the number of - elements in a vector. For single-defuse-cycle, lane-reducing op, and - PHI statement that starts reduction comprised of only lane-reducing ops, - the number is more than effective vector statements actually required. */ - if (SLP_TREE_VECTYPE (node)) - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node); - else - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; - /* Handle purely internal nodes. */ if (SLP_TREE_PERMUTE_P (node)) { @@ -8220,7 +8208,7 @@ vect_scalar_ops_slice_hash::equal (const value_type &s1, by NODE. */ static void -vect_prologue_cost_for_slp (slp_tree node, +vect_prologue_cost_for_slp (vec_info *vinfo, slp_tree node, stmt_vector_for_cost *cost_vec) { /* There's a special case of an existing vector, that costs nothing. */ @@ -8234,14 +8222,15 @@ vect_prologue_cost_for_slp (slp_tree node, unsigned group_size = SLP_TREE_SCALAR_OPS (node).length (); unsigned HOST_WIDE_INT const_nunits; unsigned nelt_limit; + unsigned nvectors = vect_get_num_copies (vinfo, node); auto ops = &SLP_TREE_SCALAR_OPS (node); - auto_vec starts (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); + auto_vec starts (nvectors); if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits) && ! multiple_p (const_nunits, group_size)) { nelt_limit = const_nunits; hash_set vector_ops; - for (unsigned int i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i) + for (unsigned int i = 0; i < nvectors; ++i) if (!vector_ops.add ({ ops, i * nelt_limit, nelt_limit })) starts.quick_push (i * nelt_limit); } @@ -8395,10 +8384,8 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, continue; } - SLP_TREE_NUMBER_OF_VEC_STMTS (child) - = vect_get_num_copies (vinfo, child); /* And cost them. */ - vect_prologue_cost_for_slp (child, cost_vec); + vect_prologue_cost_for_slp (vinfo, child, cost_vec); } /* If this node or any of its children can't be vectorized, try pruning @@ -10337,7 +10324,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node) /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */ vector_type = SLP_TREE_VECTYPE (op_node); - unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (op_node); + unsigned int number_of_vectors = vect_get_num_copies (vinfo, op_node); SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors); auto_vec voprnds (number_of_vectors); @@ -10562,7 +10549,7 @@ vect_get_slp_vect_def (slp_tree slp_node, unsigned i) void vect_get_slp_defs (slp_tree slp_node, vec *vec_defs) { - vec_defs->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)); + vec_defs->create (SLP_TREE_VEC_DEFS (slp_node).length ()); vec_defs->splice (SLP_TREE_VEC_DEFS (slp_node)); } @@ -10616,7 +10603,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node, mode = TYPE_MODE (vectype); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node); + unsigned int nstmts = vect_get_num_copies (vinfo, node); /* Initialize the vect stmts of NODE to properly insert the generated stmts later. */ @@ -10816,7 +10803,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node, if (n_loads) { if (repeating_p) - *n_loads = SLP_TREE_NUMBER_OF_VEC_STMTS (node); + *n_loads = nstmts; else { /* Enforced above when !repeating_p. */ @@ -11065,7 +11052,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, unsigned vec_idx = (SLP_TREE_LANE_PERMUTATION (node)[0].second / SLP_TREE_LANES (node)); unsigned vec_num = SLP_TREE_LANES (child) / SLP_TREE_LANES (node); - for (unsigned i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); ++i) + unsigned nvectors = vect_get_num_copies (vinfo, node); + for (unsigned i = 0; i < nvectors; ++i) { tree def = SLP_TREE_VEC_DEFS (child)[i * vec_num + vec_idx]; node->push_vec_def (def); @@ -11406,14 +11394,11 @@ vect_schedule_slp_node (vec_info *vinfo, return; } - gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ()); - stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); - gcc_assert (!SLP_TREE_VECTYPE (node) - || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0); - if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0) - SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); + gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ()); + if (SLP_TREE_VECTYPE (node)) + SLP_TREE_VEC_DEFS (node).create (vect_get_num_copies (vinfo, node)); if (!SLP_TREE_PERMUTE_P (node) && STMT_VINFO_DATA_REF (stmt_info)) { @@ -11675,7 +11660,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i if (instance->kind == slp_inst_kind_ctor) { - if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1) + if (SLP_TREE_VEC_DEFS (node).length () == 1) { tree vect_lhs = SLP_TREE_VEC_DEFS (node)[0]; tree root_lhs = gimple_get_lhs (instance->root_stmts[0]->stmt); @@ -11685,13 +11670,13 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i vect_lhs); rstmt = gimple_build_assign (root_lhs, vect_lhs); } - else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1) + else { - int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node); + gcc_assert (SLP_TREE_VEC_DEFS (node).length () > 1); tree child_def; int j; vec *v; - vec_alloc (v, nelts); + vec_alloc (v, SLP_TREE_VEC_DEFS (node).length ()); /* A CTOR can handle V16HI composition from VNx8HI so we do not need to convert vector elements if the types diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 6274956e2a5..07291dfea41 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -916,7 +916,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) of the same KIND. */ static void -vect_model_simple_cost (vec_info *, int n, slp_tree node, +vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node, stmt_vector_for_cost *cost_vec, vect_cost_for_stmt kind = vector_stmt) { @@ -924,7 +924,7 @@ vect_model_simple_cost (vec_info *, int n, slp_tree node, gcc_assert (cost_vec != NULL); - n *= SLP_TREE_NUMBER_OF_VEC_STMTS (node); + n *= vect_get_num_copies (vinfo, node); /* Pass the inside-of-loop statements to the target-specific cost model. */ inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body); @@ -3120,7 +3120,7 @@ vectorizable_bswap (vec_info *vinfo, record_stmt_cost (cost_vec, 1, vector_stmt, slp_node, 0, vect_prologue); record_stmt_cost (cost_vec, - SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + vect_get_num_copies (vinfo, slp_node), vec_perm, slp_node, 0, vect_body); return true; } @@ -3438,6 +3438,7 @@ vectorizable_call (vec_info *vinfo, int len_opno = internal_fn_len_index (cond_len_fn); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL); + unsigned int nvectors = vect_get_num_copies (vinfo, slp_node); if (cost_vec) /* transformation not required. */ { for (i = 0; i < nargs; ++i) @@ -3474,7 +3475,6 @@ vectorizable_call (vec_info *vinfo, } else { - unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); tree scalar_mask = NULL_TREE; if (mask_opno >= 0) scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); @@ -3531,7 +3531,7 @@ vectorizable_call (vec_info *vinfo, /* Build argument list for the vectorized call. */ if (cfn == CFN_GOMP_SIMD_LANE) { - for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i) + for (i = 0; i < nvectors; ++i) { /* ??? For multi-lane SLP we'd need to build { 0, 0, .., 1, 1, ... }. */ @@ -5390,6 +5390,7 @@ vectorizable_conversion (vec_info *vinfo, return false; } DUMP_VECT_SCOPE ("vectorizable_conversion"); + unsigned int nvectors = vect_get_num_copies (vinfo, slp_node); if (modifier == NONE) { SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type; @@ -5400,7 +5401,6 @@ vectorizable_conversion (vec_info *vinfo, { SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ - unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); vect_model_promotion_demotion_cost (slp_node, nvectors, multi_step_cvt, cost_vec, widen_arith); @@ -5411,9 +5411,8 @@ vectorizable_conversion (vec_info *vinfo, /* The initial unpacking step produces two vector results per copy. MULTI_STEP_CVT is 0 for a single conversion, so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ - unsigned int nvectors - = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt; - vect_model_promotion_demotion_cost (slp_node, nvectors, + vect_model_promotion_demotion_cost (slp_node, + nvectors >> multi_step_cvt, multi_step_cvt, cost_vec, widen_arith); } @@ -5943,7 +5942,7 @@ vectorizable_shift (vec_info *vinfo, scalar shift operand but code-generation below simply always takes the first. */ if (dt[1] == vect_internal_def - && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node), stmts.length ())) scalar_shift_arg = false; @@ -6102,6 +6101,7 @@ vectorizable_shift (vec_info *vinfo, /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); + unsigned nvectors = vect_get_num_copies (vinfo, slp_node); if (scalar_shift_arg && dt[1] != vect_internal_def) { /* Vector shl and shr insn patterns can be defined with scalar @@ -6115,15 +6115,14 @@ vectorizable_shift (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "operand 1 using scalar mode.\n"); vec_oprnd1 = op1; - vec_oprnds1.create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)); + vec_oprnds1.create (nvectors); vec_oprnds1.quick_push (vec_oprnd1); - /* Store vec_oprnd1 for every vector stmt to be created. - We check during the analysis that all the shift arguments - are the same. - TODO: Allow different constants for different vector - stmts generated for an SLP instance. */ - for (k = 0; - k < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) - 1; k++) + /* Store vec_oprnd1 for every vector stmt to be created. + We check during the analysis that all the shift arguments + are the same. + TODO: Allow different constants for different vector + stmts generated for an SLP instance. */ + for (k = 0; k < nvectors - 1; k++) vec_oprnds1.quick_push (vec_oprnd1); } } @@ -6141,8 +6140,8 @@ vectorizable_shift (vec_info *vinfo, gsi); vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype, gsi); - vec_oprnds1.create (slp_node->vec_stmts_size); - for (k = 0; k < slp_node->vec_stmts_size; k++) + vec_oprnds1.create (nvectors); + for (k = 0; k < nvectors; k++) vec_oprnds1.quick_push (vec_oprnd1); } else if (dt[1] == vect_constant_def) @@ -6393,7 +6392,7 @@ vectorizable_operation (vec_info *vinfo, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. */ - auto vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + auto vec_num = vect_get_num_copies (vinfo, slp_node); /* Reject attempts to combine mask types with nonmask types, e.g. if we have an AND between a (nonmask) boolean loaded from memory and @@ -6515,7 +6514,7 @@ vectorizable_operation (vec_info *vinfo, in the prologue and (mis-)costs one of the stmts as vector stmt. See below for the actual lowering that will be applied. */ - unsigned n = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + unsigned n = vect_get_num_copies (vinfo, slp_node); switch (code) { case PLUS_EXPR: @@ -7855,11 +7854,12 @@ vectorizable_store (vec_info *vinfo, } else vf = 1; + vec_num = vect_get_num_copies (vinfo, slp_node); /* FORNOW. This restriction should be relaxed. */ if (loop && nested_in_vect_loop_p (loop, stmt_info) - && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1) + && vec_num > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7949,7 +7949,7 @@ vectorizable_store (vec_info *vinfo, { first_stmt_info = stmt_info; first_dr_info = dr_info; - group_size = vec_num = 1; + group_size = 1; } if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec) @@ -8026,9 +8026,6 @@ vectorizable_store (vec_info *vinfo, || !nested_in_vect_loop_p (loop, stmt_info)); grouped_store = false; - /* VEC_NUM is the number of vect stmts to be created for this - group. */ - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info) || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info)); @@ -8201,7 +8198,7 @@ vectorizable_store (vec_info *vinfo, if (nstores > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); ltype = build_aligned_type (ltype, align * BITS_PER_UNIT); - int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + int ncopies = vec_num; if (!costing_p) { @@ -8412,7 +8409,7 @@ vectorizable_store (vec_info *vinfo, /* For costing some adjacent vector stores, we'd like to cost with the total number of them once instead of cost each one by one. */ unsigned int n_adjacent_stores = 0; - int ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) / group_size; + int ncopies = vec_num / group_size; for (j = 0; j < ncopies; j++) { if (j == 0) @@ -9398,9 +9395,10 @@ vectorizable_load (vec_info *vinfo, else vf = 1; + vec_num = vect_get_num_copies (vinfo, slp_node); + /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop - && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1) + if (nested_in_vect_loop && vec_num > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9701,7 +9699,7 @@ vectorizable_load (vec_info *vinfo, vectype, &gsi2); } gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); - for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j) + for (j = 0; j < (int) vec_num; ++j) slp_node->push_vec_def (new_stmt); return true; } @@ -9896,7 +9894,7 @@ vectorizable_load (vec_info *vinfo, dr_chain.create (ncopies); } else - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + ncopies = vec_num; unsigned int group_el = 0; unsigned HOST_WIDE_INT @@ -10071,7 +10069,7 @@ vectorizable_load (vec_info *vinfo, /* We do not support grouped accesses in a nested loop, instead the access is contiguous but it might be permuted. No gap adjustment is needed though. */ - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + ; else if (slp_perm && (group_size != scalar_lanes || !multiple_p (nunits, group_size))) @@ -10085,7 +10083,6 @@ vectorizable_load (vec_info *vinfo, } else { - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); group_gap_adj = group_size - scalar_lanes; } @@ -10095,10 +10092,9 @@ vectorizable_load (vec_info *vinfo, { first_stmt_info = stmt_info; first_dr_info = dr_info; - group_size = vec_num = 1; + group_size = 1; group_gap_adj = 0; ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } vec_loop_masks *loop_masks @@ -10267,7 +10263,7 @@ vectorizable_load (vec_info *vinfo, /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ unsigned int n_adjacent_loads = 0; - int ncopies = slp_node->vec_stmts_size / group_size; + int ncopies = vec_num / group_size; for (j = 0; j < ncopies; j++) { if (costing_p) @@ -11600,7 +11596,7 @@ vectorizable_condition (vec_info *vinfo, tree vectype = SLP_TREE_VECTYPE (slp_node); tree vectype1 = NULL_TREE, vectype2 = NULL_TREE; - int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + int vec_num = vect_get_num_copies (vinfo, slp_node); cond_expr = gimple_assign_rhs1 (stmt); gcc_assert (! COMPARISON_CLASS_P (cond_expr)); @@ -12332,7 +12328,7 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; machine_mode mode = TYPE_MODE (vectype); - int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + int vec_num = vect_get_num_copies (loop_vinfo, slp_node); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b7c2188ab3d..d6ff23252d8 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -330,11 +330,6 @@ struct _slp_tree { tree vectype; /* Vectorized defs. */ vec vec_defs; - /* Number of vector stmts that are created to replace the group of scalar - stmts. It is calculated during the transformation phase as the number of - scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF - divided by vector size. */ - unsigned int vec_stmts_size; /* Reference count in the SLP graph. */ unsigned int refcnt; @@ -444,7 +439,6 @@ public: #define SLP_TREE_SCALAR_OPS(S) (S)->ops #define SLP_TREE_REF_COUNT(S) (S)->refcnt #define SLP_TREE_VEC_DEFS(S) (S)->vec_defs -#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation #define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation #define SLP_TREE_DEF_TYPE(S) (S)->def_type -- 2.47.3