/* Loop Vectorization
- Copyright (C) 2003-2018 Free Software Foundation, Inc.
+ Copyright (C) 2003-2019 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com> and
Ira Rosen <irar@il.ibm.com>
static opt_result
vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
unsigned nbbs = loop->num_nodes;
poly_uint64 vectorization_factor = 1;
enclosing LOOP). */
static void
-vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
+vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
{
basic_block bb = loop->header;
tree init, step;
static void
vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
static gcond *
-vect_get_loop_niters (struct loop *loop, tree *assumptions,
+vect_get_loop_niters (class loop *loop, tree *assumptions,
tree *number_of_iterations, tree *number_of_iterationsm1)
{
edge exit = single_exit (loop);
- struct tree_niter_desc niter_desc;
+ class tree_niter_desc niter_desc;
tree niter_assumptions, niter, may_be_zero;
gcond *cond = get_loop_exit_condition (loop);
if (!exit)
return cond;
- niter = chrec_dont_know;
may_be_zero = NULL_TREE;
- niter_assumptions = boolean_true_node;
if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
|| chrec_contains_undetermined (niter_desc.niter))
return cond;
static bool
bb_in_loop_p (const_basic_block bb, const void *data)
{
- const struct loop *const loop = (const struct loop *)data;
+ const class loop *const loop = (const class loop *)data;
if (flow_bb_inside_loop_p (loop, bb))
return true;
return false;
/* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
stmt_vec_info structs for all the stmts in LOOP_IN. */
-_loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
+_loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
: vec_info (vec_info::loop, init_cost (loop_in), shared),
loop (loop_in),
bbs (XCNEWVEC (basic_block, loop->num_nodes)),
max_vectorization_factor (0),
mask_skip_niters (NULL_TREE),
mask_compare_type (NULL_TREE),
+ simd_if_cond (NULL_TREE),
unaligned_dr (NULL),
peeling_for_alignment (0),
ptr_mask (0),
ivexpr_map (NULL),
+ scan_map (NULL),
slp_unrolling_factor (1),
single_scalar_iteration_cost (0),
vectorizable (false),
operands_swapped (false),
no_data_dependencies (false),
has_mask_store (false),
+ scalar_loop_scaling (profile_probability::uninitialized ()),
scalar_loop (NULL),
orig_loop_info (NULL)
{
gimple *stmt = gsi_stmt (si);
gimple_set_uid (stmt, 0);
add_stmt (stmt);
+ /* If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the
+ third argument is the #pragma omp simd if (x) condition, when 0,
+ loop shouldn't be vectorized, when non-zero constant, it should
+ be vectorized normally, otherwise versioned with vectorized loop
+ done if the condition is non-zero at runtime. */
+ if (loop_in->simduid
+ && is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
+ && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+ && gimple_call_num_args (stmt) >= 3
+ && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
+ && (loop_in->simduid
+ == SSA_NAME_VAR (gimple_call_arg (stmt, 0))))
+ {
+ tree arg = gimple_call_arg (stmt, 2);
+ if (integer_zerop (arg) || TREE_CODE (arg) == SSA_NAME)
+ simd_if_cond = arg;
+ else
+ gcc_assert (integer_nonzerop (arg));
+ }
}
}
}
release_vec_loop_masks (&masks);
delete ivexpr_map;
+ delete scan_map;
loop->aux = NULL;
}
static bool
vect_verify_full_masking (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int min_ni_width;
+ unsigned int max_nscalars_per_iter
+ = vect_get_max_nscalars_per_iter (loop_vinfo);
/* Use a normal loop if there are no statements that need masking.
This only happens in rare degenerate cases: it means that the loop
max_ni = wi::smin (max_ni, max_back_edges + 1);
/* Account for rgroup masks, in which each bit is replicated N times. */
- max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo);
+ max_ni *= max_nscalars_per_iter;
/* Work out how many bits we need to represent the limit. */
min_ni_width = wi::min_precision (max_ni, UNSIGNED);
/* Find a scalar mode for which WHILE_ULT is supported. */
opt_scalar_int_mode cmp_mode_iter;
tree cmp_type = NULL_TREE;
+ tree iv_type = NULL_TREE;
+ widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+ unsigned int iv_precision = UINT_MAX;
+
+ if (iv_limit != -1)
+ iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter,
+ UNSIGNED);
+
FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
{
unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
&& can_produce_all_loop_masks_p (loop_vinfo, this_type))
{
/* Although we could stop as soon as we find a valid mode,
- it's often better to continue until we hit Pmode, since the
- operands to the WHILE are more likely to be reusable in
- address calculations. */
- cmp_type = this_type;
+ there are at least two reasons why that's not always the
+ best choice:
+
+ - An IV that's Pmode or wider is more likely to be reusable
+ in address calculations than an IV that's narrower than
+ Pmode.
+
+ - Doing the comparison in IV_PRECISION or wider allows
+ a natural 0-based IV, whereas using a narrower comparison
+ type requires mitigations against wrap-around.
+
+ Conversely, if the IV limit is variable, doing the comparison
+ in a wider type than the original type can introduce
+ unnecessary extensions, so picking the widest valid mode
+ is not always a good choice either.
+
+ Here we prefer the first IV type that's Pmode or wider,
+ and the first comparison type that's IV_PRECISION or wider.
+ (The comparison type must be no wider than the IV type,
+ to avoid extensions in the vector loop.)
+
+ ??? We might want to try continuing beyond Pmode for ILP32
+ targets if CMP_BITS < IV_PRECISION. */
+ iv_type = this_type;
+ if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type))
+ cmp_type = this_type;
if (cmp_bits >= GET_MODE_BITSIZE (Pmode))
break;
}
return false;
LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
+ LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type;
return true;
}
static void
vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes, factor;
int innerloop_iters, i;
+ DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost");
+
/* Gather costs for statements in the scalar loop. */
/* FORNOW. */
continue;
/* Skip stmts that are not vectorized inside the loop. */
- if (stmt_info
- && !STMT_VINFO_RELEVANT_P (stmt_info)
- && (!STMT_VINFO_LIVE_P (stmt_info)
- || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !STMT_VINFO_IN_PATTERN_P (stmt_info))
+ stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
+ if (!STMT_VINFO_RELEVANT_P (vstmt_info)
+ && (!STMT_VINFO_LIVE_P (vstmt_info)
+ || !VECTORIZABLE_CYCLE_DEF
+ (STMT_VINFO_DEF_TYPE (vstmt_info))))
continue;
vect_cost_for_stmt kind;
niter could be analyzed under some assumptions. */
opt_result
-vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
+vect_analyze_loop_form_1 (class loop *loop, gcond **loop_cond,
tree *assumptions, tree *number_of_iterationsm1,
tree *number_of_iterations, gcond **inner_loop_cond)
{
}
else
{
- struct loop *innerloop = loop->inner;
+ class loop *innerloop = loop->inner;
edge entryedge;
/* Nested loop. We currently require that the loop is doubly-nested,
/* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */
opt_loop_vec_info
-vect_analyze_loop_form (struct loop *loop, vec_info_shared *shared)
+vect_analyze_loop_form (class loop *loop, vec_info_shared *shared)
{
tree assumptions, number_of_iterations, number_of_iterationsm1;
gcond *loop_cond, *inner_loop_cond = NULL;
static void
vect_update_vf_for_slp (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
poly_uint64 vectorization_factor;
if (only_slp_in_loop)
{
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains only SLP stmts\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Loop contains only SLP stmts\n");
vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
}
else
{
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Loop contains SLP and non-SLP stmts\n");
/* Both the vectorization factor and unroll factor have the form
current_vector_size * X for some rational X, so they must have
a common multiple. */
static opt_result
vect_analyze_loop_operations (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
int i;
DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
- stmt_vector_for_cost cost_vec;
- cost_vec.create (2);
+ auto_vec<stmt_info_for_cost> cost_vec;
for (i = 0; i < nbbs; i++)
{
} /* bbs */
add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
- cost_vec.release ();
/* All operations in the loop are either irrelevant (deal with loop
control, or dead), or only used outside the loop and can be moved
static int
vect_analyze_loop_costing (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
/* Only fully-masked loops can have iteration counts less than the
return opt_result::success ();
}
+/* Look for SLP-only access groups and turn each individual access into its own
+ group. */
+static void
+vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
+{
+ unsigned int i;
+ struct data_reference *dr;
+
+ DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
+
+ vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ {
+ gcc_assert (DR_REF (dr));
+ stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
+
+ /* Check if the load is a part of an interleaving chain. */
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ {
+ stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ unsigned int group_size = DR_GROUP_SIZE (first_element);
+
+ /* Check if SLP-only groups. */
+ if (!STMT_SLP_TYPE (stmt_info)
+ && STMT_VINFO_SLP_VECT_ONLY (first_element))
+ {
+ /* Dissolve the group. */
+ STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
+
+ stmt_vec_info vinfo = first_element;
+ while (vinfo)
+ {
+ stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
+ DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
+ DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
+ DR_GROUP_SIZE (vinfo) = 1;
+ DR_GROUP_GAP (vinfo) = group_size - 1;
+ vinfo = next;
+ }
+ }
+ }
+ }
+}
+
/* Function vect_analyze_loop_2.
Apply a set of analyses on LOOP, and create a loop_vec_info struct
/* The first group of checks is independent of the vector size. */
fatal = true;
+ if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)
+ && integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)))
+ return opt_result::failure_at (vect_location,
+ "not vectorized: simd if(0)\n");
+
/* Find all data references in the loop (which correspond to vdefs/vuses)
and analyze their evolution in the loop. */
/* Analyze the data references and also adjust the minimal
vectorization factor according to the loads and stores. */
- ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
+ ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
if (!ok)
{
if (dump_enabled_p ())
/* Data-flow analysis to detect stmts that do not need to be vectorized. */
- ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
+ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
if (!ok)
{
if (dump_enabled_p ())
}
}
+ /* Dissolve SLP-only groups. */
+ vect_dissolve_slp_only_groups (loop_vinfo);
+
/* Scan all the remaining operations in the loop that are not subject
to SLP and make sure they are vectorizable. */
ok = vect_analyze_loop_operations (loop_vinfo);
loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
be vectorized. */
opt_loop_vec_info
-vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
+vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo,
vec_info_shared *shared)
{
auto_vector_sizes vector_sizes;
/* Autodetect first vector size we try. */
current_vector_size = 0;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
+ loop->simdlen != 0);
unsigned int next_size = 0;
DUMP_VECT_SCOPE ("analyze_loop_nest");
unsigned n_stmts = 0;
poly_uint64 autodetected_vector_size = 0;
+ opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
+ poly_uint64 first_vector_size = 0;
while (1)
{
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"bad loop form.\n");
+ gcc_checking_assert (first_loop_vinfo == NULL);
return loop_vinfo;
}
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
- return loop_vinfo;
+ if (loop->simdlen
+ && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ (unsigned HOST_WIDE_INT) loop->simdlen))
+ {
+ if (first_loop_vinfo == NULL)
+ {
+ first_loop_vinfo = loop_vinfo;
+ first_vector_size = current_vector_size;
+ loop->aux = NULL;
+ }
+ else
+ delete loop_vinfo;
+ }
+ else
+ {
+ delete first_loop_vinfo;
+ return loop_vinfo;
+ }
}
-
- delete loop_vinfo;
+ else
+ delete loop_vinfo;
if (next_size == 0)
autodetected_vector_size = current_vector_size;
&& known_eq (vector_sizes[next_size], autodetected_vector_size))
next_size += 1;
- if (fatal
- || next_size == vector_sizes.length ()
+ if (fatal)
+ {
+ gcc_checking_assert (first_loop_vinfo == NULL);
+ return opt_loop_vec_info::propagate_failure (res);
+ }
+
+ if (next_size == vector_sizes.length ()
|| known_eq (current_vector_size, 0U))
- return opt_loop_vec_info::propagate_failure (res);
+ {
+ if (first_loop_vinfo)
+ {
+ current_vector_size = first_vector_size;
+ loop->aux = (loop_vec_info) first_loop_vinfo;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Choosing vector size ");
+ dump_dec (MSG_NOTE, current_vector_size);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return first_loop_vinfo;
+ }
+ else
+ return opt_loop_vec_info::propagate_failure (res);
+ }
/* Try the next biggest vector size. */
current_vector_size = vector_sizes[next_size++];
stmt_vec_info stmt_vinfo = stmts[0];
tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
tree scalar_type = TREE_TYPE (vector_type);
- struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
+ class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
gcc_assert (loop);
switch (code)
vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
gimple *first_stmt)
{
- struct loop *loop = (gimple_bb (phi))->loop_father;
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ class loop *loop = (gimple_bb (phi))->loop_father;
+ class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
enum tree_code code;
gimple *loop_use_stmt = NULL;
- stmt_vec_info use_stmt_info, current_stmt_info = NULL;
+ stmt_vec_info use_stmt_info;
tree lhs;
imm_use_iterator imm_iter;
use_operand_p use_p;
if (loop != vect_loop)
return false;
+ auto_vec<stmt_vec_info, 8> reduc_chain;
lhs = PHI_RESULT (phi);
code = gimple_assign_rhs_code (first_stmt);
while (1)
/* Insert USE_STMT into reduction chain. */
use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
- if (current_stmt_info)
- {
- REDUC_GROUP_NEXT_ELEMENT (current_stmt_info) = use_stmt_info;
- REDUC_GROUP_FIRST_ELEMENT (use_stmt_info)
- = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
- }
- else
- REDUC_GROUP_FIRST_ELEMENT (use_stmt_info) = use_stmt_info;
+ reduc_chain.safe_push (use_stmt_info);
lhs = gimple_assign_lhs (loop_use_stmt);
- current_stmt_info = use_stmt_info;
size++;
}
/* Swap the operands, if needed, to make the reduction operand be the second
operand. */
lhs = PHI_RESULT (phi);
- stmt_vec_info next_stmt_info = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
- while (next_stmt_info)
+ for (unsigned i = 0; i < reduc_chain.length (); ++i)
{
- gassign *next_stmt = as_a <gassign *> (next_stmt_info->stmt);
+ gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
if (gimple_assign_rhs2 (next_stmt) == lhs)
{
tree op = gimple_assign_rhs1 (next_stmt);
&& vect_valid_reduction_input_p (def_stmt_info))
{
lhs = gimple_assign_lhs (next_stmt);
- next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info);
continue;
}
}
lhs = gimple_assign_lhs (next_stmt);
- next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info);
}
+ /* Build up the actual chain. */
+ for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
+ {
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
+ }
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
+
/* Save the chain for further analysis in SLP detection. */
- stmt_vec_info first_stmt_info
- = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
- LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first_stmt_info);
- REDUC_GROUP_SIZE (first_stmt_info) = size;
+ LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
+ REDUC_GROUP_SIZE (reduc_chain[0]) = size;
return true;
}
enum vect_reduction_type *v_reduc_type)
{
gphi *phi = as_a <gphi *> (phi_info->stmt);
- struct loop *loop = (gimple_bb (phi))->loop_father;
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ class loop *loop = (gimple_bb (phi))->loop_father;
+ class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
gimple *phi_use_stmt = NULL;
enum tree_code orig_code, code;
tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
tree type;
- int nloop_uses;
tree name;
imm_use_iterator imm_iter;
use_operand_p use_p;
can be constant. See PR60382. */
if (has_zero_uses (phi_name))
return NULL;
- nloop_uses = 0;
+ unsigned nphi_def_loop_uses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
{
gimple *use_stmt = USE_STMT (use_p);
return NULL;
}
- nloop_uses++;
- if (nloop_uses > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction value used in loop.\n");
- return NULL;
- }
-
+ nphi_def_loop_uses++;
phi_use_stmt = use_stmt;
}
return NULL;
}
- nloop_uses = 0;
+ unsigned nlatch_def_loop_uses = 0;
auto_vec<gphi *, 3> lcphis;
+ bool inner_loop_of_double_reduc = false;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
gimple *use_stmt = USE_STMT (use_p);
if (is_gimple_debug (use_stmt))
continue;
if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
- nloop_uses++;
+ nlatch_def_loop_uses++;
else
- /* We can have more than one loop-closed PHI. */
- lcphis.safe_push (as_a <gphi *> (use_stmt));
- if (nloop_uses > 1)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.\n");
- return NULL;
+ /* We can have more than one loop-closed PHI. */
+ lcphis.safe_push (as_a <gphi *> (use_stmt));
+ if (nested_in_vect_loop
+ && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
+ == vect_double_reduction_def))
+ inner_loop_of_double_reduc = true;
}
}
+ /* If this isn't a nested cycle or if the nested cycle reduction value
+ is used ouside of the inner loop we cannot handle uses of the reduction
+ value. */
+ if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
+ && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction used in loop.\n");
+ return NULL;
+ }
+
/* If DEF_STMT is a phi node itself, we expect it to have a single argument
defined in the inner loop. */
if (phi_def)
}
gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
- bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt);
+ if (nested_in_vect_loop && !check_reduction)
+ {
+ /* FIXME: Even for non-reductions code generation is funneled
+ through vectorizable_reduction for the stmt defining the
+ PHI latch value. So we have to artificially restrict ourselves
+ for the supported operations. */
+ switch (get_gimple_rhs_class (code))
+ {
+ case GIMPLE_BINARY_RHS:
+ case GIMPLE_TERNARY_RHS:
+ break;
+ default:
+ /* Not supported by vectorizable_reduction. */
+ if (dump_enabled_p ())
+ report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+ "nested cycle: not handled operation: ");
+ return NULL;
+ }
+ if (dump_enabled_p ())
+ report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
+ return def_stmt_info;
+ }
+
/* We can handle "res -= x[i]", which is non-associative by
simply rewriting this into "res += -x[i]". Avoid changing
gimple instruction for the first simple tests and only do this
return def_stmt_info;
}
- /* Dissolve group eventually half-built by vect_is_slp_reduction. */
- stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (def_stmt_info);
- while (first)
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
- REDUC_GROUP_FIRST_ELEMENT (first) = NULL;
- REDUC_GROUP_NEXT_ELEMENT (first) = NULL;
- first = next;
- }
-
/* Look for the expression computing loop_arg from loop PHI result. */
if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
return def_stmt_info;
iterations are unknown, count a taken branch per peeled loop. */
retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
NULL, 0, vect_prologue);
- retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
- NULL, 0, vect_epilogue);
+ retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
+ NULL, 0, vect_epilogue);
}
else
{
/* Cost model disabled. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
- dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
*ret_min_profitable_niters = 0;
*ret_min_profitable_estimate = 0;
return;
unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
(void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
vect_prologue);
- dump_printf (MSG_NOTE,
- "cost model: Adding cost of checks for loop "
- "versioning to treat misalignment.\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ "cost model: Adding cost of checks for loop "
+ "versioning to treat misalignment.\n");
}
/* Requires loop versioning with alias checks. */
(void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt,
NULL, 0, vect_prologue);
}
- dump_printf (MSG_NOTE,
- "cost model: Adding cost of checks for loop "
- "versioning aliasing.\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ "cost model: Adding cost of checks for loop "
+ "versioning aliasing.\n");
}
/* Requires loop versioning with niter checks. */
/* FIXME: Make cost depend on complexity of individual check. */
(void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
vect_prologue);
- dump_printf (MSG_NOTE,
- "cost model: Adding cost of checks for loop "
- "versioning niters.\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ "cost model: Adding cost of checks for loop "
+ "versioning niters.\n");
}
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
else if (npeel < 0)
{
peel_iters_prologue = assumed_vf / 2;
- dump_printf (MSG_NOTE, "cost model: "
- "prologue peel iters set to vf/2.\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "cost model: "
+ "prologue peel iters set to vf/2.\n");
/* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */
peel_iters_epilogue = assumed_vf / 2;
- dump_printf (MSG_NOTE, "cost model: "
- "epilogue peel iters set to vf/2 because "
- "peeling for alignment is unknown.\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "cost model: "
+ "epilogue peel iters set to vf/2 because "
+ "peeling for alignment is unknown.\n");
/* If peeled iterations are unknown, count a taken branch and a not taken
branch per peeled loop. Even if scalar loop iterations are known,
/* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition
must hold true:
- SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
+ SIC * niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC
where
SIC = scalar iteration cost, VIC = vector iteration cost,
VOC = vector outside cost, VF = vectorization factor,
- PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
+ NPEEL = prologue iterations + epilogue iterations,
SOC = scalar outside cost for run time cost model check. */
- if ((scalar_single_iter_cost * assumed_vf) > (int) vec_inside_cost)
+ int saving_per_viter = (scalar_single_iter_cost * assumed_vf
+ - vec_inside_cost);
+ if (saving_per_viter <= 0)
+ {
+ if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
+ warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
+ "vectorization did not happen for a simd loop");
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cost model: the vector iteration cost = %d "
+ "divided by the scalar iteration cost = %d "
+ "is greater or equal to the vectorization factor = %d"
+ ".\n",
+ vec_inside_cost, scalar_single_iter_cost, assumed_vf);
+ *ret_min_profitable_niters = -1;
+ *ret_min_profitable_estimate = -1;
+ return;
+ }
+
+ /* ??? The "if" arm is written to handle all cases; see below for what
+ we would do for !LOOP_VINFO_FULLY_MASKED_P. */
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ /* Rewriting the condition above in terms of the number of
+ vector iterations (vniters) rather than the number of
+ scalar iterations (niters) gives:
+
+ SIC * (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC
+
+ <==> vniters * (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC
+
+ For integer N, X and Y when X > 0:
+
+ N * X > Y <==> N >= (Y /[floor] X) + 1. */
+ int outside_overhead = (vec_outside_cost
+ - scalar_single_iter_cost * peel_iters_prologue
+ - scalar_single_iter_cost * peel_iters_epilogue
+ - scalar_outside_cost);
+ /* We're only interested in cases that require at least one
+ vector iteration. */
+ int min_vec_niters = 1;
+ if (outside_overhead > 0)
+ min_vec_niters = outside_overhead / saving_per_viter + 1;
+
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
+ min_vec_niters);
+
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ /* Now that we know the minimum number of vector iterations,
+ find the minimum niters for which the scalar cost is larger:
+
+ SIC * niters > VIC * vniters + VOC - SOC
+
+ We know that the minimum niters is no more than
+ vniters * VF + NPEEL, but it might be (and often is) less
+ than that if a partial vector iteration is cheaper than the
+ equivalent scalar code. */
+ int threshold = (vec_inside_cost * min_vec_niters
+ + vec_outside_cost
+ - scalar_outside_cost);
+ if (threshold <= 0)
+ min_profitable_iters = 1;
+ else
+ min_profitable_iters = threshold / scalar_single_iter_cost + 1;
+ }
+ else
+ /* Convert the number of vector iterations into a number of
+ scalar iterations. */
+ min_profitable_iters = (min_vec_niters * assumed_vf
+ + peel_iters_prologue
+ + peel_iters_epilogue);
+ }
+ else
{
min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
* assumed_vf
min_profitable_iters = 0;
else
{
- min_profitable_iters /= ((scalar_single_iter_cost * assumed_vf)
- - vec_inside_cost);
+ min_profitable_iters /= saving_per_viter;
if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
<= (((int) vec_inside_cost * min_profitable_iters)
min_profitable_iters++;
}
}
- /* vector version will never be profitable. */
- else
- {
- if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
- warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
- "vectorization did not happen for a simd loop");
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "cost model: the vector iteration cost = %d "
- "divided by the scalar iteration cost = %d "
- "is greater or equal to the vectorization factor = %d"
- ".\n",
- vec_inside_cost, scalar_single_iter_cost, assumed_vf);
- *ret_min_profitable_niters = -1;
- *ret_min_profitable_estimate = -1;
- return;
- }
-
- dump_printf (MSG_NOTE,
- " Calculated minimum iters for profitability: %d\n",
- min_profitable_iters);
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ " Calculated minimum iters for profitability: %d\n",
+ min_profitable_iters);
if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
&& min_profitable_iters < (assumed_vf + peel_iters_prologue))
Non-vectorized variant is SIC * niters and it must win over vector
variant on the expected loop trip count. The following condition must hold true:
- SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */
+ SIC * niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC */
if (vec_outside_cost <= 0)
min_profitable_estimate = 0;
+ else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ /* This is a repeat of the code above, but with + SOC rather
+ than - SOC. */
+ int outside_overhead = (vec_outside_cost
+ - scalar_single_iter_cost * peel_iters_prologue
+ - scalar_single_iter_cost * peel_iters_epilogue
+ + scalar_outside_cost);
+ int min_vec_niters = 1;
+ if (outside_overhead > 0)
+ min_vec_niters = outside_overhead / saving_per_viter + 1;
+
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ int threshold = (vec_inside_cost * min_vec_niters
+ + vec_outside_cost
+ + scalar_outside_cost);
+ min_profitable_estimate = threshold / scalar_single_iter_cost + 1;
+ }
+ else
+ min_profitable_estimate = (min_vec_niters * assumed_vf
+ + peel_iters_prologue
+ + peel_iters_epilogue);
+ }
else
{
min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
tree vectype;
machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = NULL;
+ class loop *loop = NULL;
if (loop_vinfo)
loop = LOOP_VINFO_LOOP (loop_vinfo);
tree *adjustment_def)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree scalar_type = TREE_TYPE (init_val);
tree vectype = get_vectype_for_scalar_type (scalar_type);
enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
- tree vop;
- int group_size = stmts.length ();
- unsigned int vec_num, i;
- unsigned number_of_copies = 1;
- vec<tree> voprnds;
- voprnds.create (number_of_vectors);
- struct loop *loop;
- auto_vec<tree, 16> permute_results;
+ unsigned int group_size = stmts.length ();
+ unsigned int i;
+ class loop *loop;
vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
- number_of_copies = nunits * number_of_vectors / group_size;
-
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
- for (j = 0; j < number_of_copies; j++)
+ gimple_seq ctor_seq = NULL;
+ for (j = 0; j < nunits * number_of_vectors; ++j)
{
- for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
- {
- tree op;
- /* Get the def before the loop. In reduction chain we have only
- one initial value. */
- if ((j != (number_of_copies - 1)
- || (reduc_chain && i != 0))
- && neutral_op)
- op = neutral_op;
- else
- op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+ tree op;
+ i = j % group_size;
+ stmt_vinfo = stmts[i];
+
+ /* Get the def before the loop. In reduction chain we have only
+ one initial value. Else we have as many as PHIs in the group. */
+ if (reduc_chain)
+ op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
+ else if (((vec_oprnds->length () + 1) * nunits
+ - number_of_places_left_in_vector >= group_size)
+ && neutral_op)
+ op = neutral_op;
+ else
+ op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
- /* Create 'vect_ = {op0,op1,...,opn}'. */
- number_of_places_left_in_vector--;
- elts[number_of_places_left_in_vector] = op;
- if (!CONSTANT_CLASS_P (op))
- constant_p = false;
+ /* Create 'vect_ = {op0,op1,...,opn}'. */
+ number_of_places_left_in_vector--;
+ elts[nunits - number_of_places_left_in_vector - 1] = op;
+ if (!CONSTANT_CLASS_P (op))
+ constant_p = false;
- if (number_of_places_left_in_vector == 0)
- {
- gimple_seq ctor_seq = NULL;
- tree init;
- if (constant_p && !neutral_op
- ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
- : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
- /* Build the vector directly from ELTS. */
- init = gimple_build_vector (&ctor_seq, &elts);
- else if (neutral_op)
- {
- /* Build a vector of the neutral value and shift the
- other elements into place. */
- init = gimple_build_vector_from_val (&ctor_seq, vector_type,
- neutral_op);
- int k = nunits;
- while (k > 0 && elts[k - 1] == neutral_op)
- k -= 1;
- while (k > 0)
- {
- k -= 1;
- init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
- vector_type, init, elts[k]);
- }
- }
- else
+ if (number_of_places_left_in_vector == 0)
+ {
+ tree init;
+ if (constant_p && !neutral_op
+ ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+ : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ /* Build the vector directly from ELTS. */
+ init = gimple_build_vector (&ctor_seq, &elts);
+ else if (neutral_op)
+ {
+ /* Build a vector of the neutral value and shift the
+ other elements into place. */
+ init = gimple_build_vector_from_val (&ctor_seq, vector_type,
+ neutral_op);
+ int k = nunits;
+ while (k > 0 && elts[k - 1] == neutral_op)
+ k -= 1;
+ while (k > 0)
{
- /* First time round, duplicate ELTS to fill the
- required number of vectors, then cherry pick the
- appropriate result for each iteration. */
- if (vec_oprnds->is_empty ())
- duplicate_and_interleave (&ctor_seq, vector_type, elts,
- number_of_vectors,
- permute_results);
- init = permute_results[number_of_vectors - j - 1];
+ k -= 1;
+ init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
+ vector_type, init, elts[k]);
}
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
- voprnds.quick_push (init);
-
- number_of_places_left_in_vector = nunits;
- elts.new_vector (vector_type, nunits, 1);
- elts.quick_grow (nunits);
- constant_p = true;
- }
- }
- }
-
- /* Since the vectors are created in the reverse order, we should invert
- them. */
- vec_num = voprnds.length ();
- for (j = vec_num; j != 0; j--)
- {
- vop = voprnds[j - 1];
- vec_oprnds->quick_push (vop);
- }
-
- voprnds.release ();
-
- /* In case that VF is greater than the unrolling factor needed for the SLP
- group of stmts, NUMBER_OF_VECTORS to be created is greater than
- NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
- to replicate the vectors. */
- tree neutral_vec = NULL;
- while (number_of_vectors > vec_oprnds->length ())
- {
- if (neutral_op)
- {
- if (!neutral_vec)
+ }
+ else
{
- gimple_seq ctor_seq = NULL;
- neutral_vec = gimple_build_vector_from_val
- (&ctor_seq, vector_type, neutral_op);
- if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+ /* First time round, duplicate ELTS to fill the
+ required number of vectors. */
+ duplicate_and_interleave (&ctor_seq, vector_type, elts,
+ number_of_vectors, *vec_oprnds);
+ break;
}
- vec_oprnds->quick_push (neutral_vec);
- }
- else
- {
- for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
- vec_oprnds->quick_push (vop);
- }
+ vec_oprnds->quick_push (init);
+
+ number_of_places_left_in_vector = nunits;
+ elts.new_vector (vector_type, nunits, 1);
+ elts.quick_grow (nunits);
+ constant_p = true;
+ }
}
+ if (ctor_seq != NULL)
+ gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
}
tree vectype;
machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
basic_block exit_bb;
tree scalar_dest;
tree scalar_type;
if (off != 0)
{
tree new_idx_val = idx_val;
- tree new_val = val;
if (off != v_size - el_size)
{
new_idx_val = make_ssa_name (idx_eltype);
old_idx_val);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
}
- new_val = make_ssa_name (data_eltype);
+ tree new_val = make_ssa_name (data_eltype);
epilog_stmt = gimple_build_assign (new_val,
COND_EXPR,
build2 (GT_EXPR,
in a vector mode of smaller size and first reduce upper/lower
halves against each other. */
enum machine_mode mode1 = mode;
- tree vectype1 = vectype;
unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
unsigned sz1 = sz;
if (!slp_reduc
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
sz1 = GET_MODE_SIZE (mode1).to_constant ();
- vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
+ tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
reduce_with_shift = have_whole_vector_shift (mode1);
if (!VECTOR_MODE_P (mode1))
reduce_with_shift = false;
dump_printf_loc (MSG_NOTE, vect_location,
"Reduce using vector shifts\n");
- mode1 = TYPE_MODE (vectype1);
vec_dest = vect_create_destination_var (scalar_dest, vectype1);
for (elt_offset = nelements / 2;
elt_offset >= 1;
= loop_vinfo->lookup_stmt (exit_phi);
gphi *vect_phi;
- /* FORNOW. Currently not supporting the case that an inner-loop
- reduction is not used in the outer-loop (but only outside the
- outer-loop), unless it is double reduction. */
- gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
- && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
- || double_reduc);
-
if (double_reduc)
STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
else
return lhs;
}
+/* Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the
+ type of the vector input. */
+
+static internal_fn
+get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in)
+{
+ internal_fn mask_reduc_fn;
+
+ switch (reduc_fn)
+ {
+ case IFN_FOLD_LEFT_PLUS:
+ mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS;
+ break;
+
+ default:
+ return IFN_LAST;
+ }
+
+ if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in,
+ OPTIMIZE_FOR_SPEED))
+ return mask_reduc_fn;
+ return IFN_LAST;
+}
+
/* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the
statement that sets the live-out value. REDUC_DEF_STMT is the phi
statement. CODE is the operation performed by STMT_INFO and OPS are
int reduc_index, vec_loop_masks *masks)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
stmt_vec_info new_stmt_info = NULL;
+ internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
int ncopies;
if (slp_node)
auto_vec<tree> vec_oprnds0;
if (slp_node)
{
- vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
- slp_node);
+ auto_vec<vec<tree> > vec_defs (2);
+ auto_vec<tree> sops(2);
+ sops.quick_push (ops[0]);
+ sops.quick_push (ops[1]);
+ vect_get_slp_defs (sops, slp_node, &vec_defs);
+ vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
+ vec_defs[0].release ();
+ vec_defs[1].release ();
group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
}
def0 = negated;
}
- if (mask)
+ if (mask && mask_reduc_fn == IFN_LAST)
def0 = merge_with_identity (gsi, mask, vectype_out, def0,
vector_identity);
/* On the first iteration the input is simply the scalar phi
result, and for subsequent iterations it is the output of
the preceding operation. */
- if (reduc_fn != IFN_LAST)
+ if (reduc_fn != IFN_LAST || (mask && mask_reduc_fn != IFN_LAST))
{
- new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0);
+ if (mask && mask_reduc_fn != IFN_LAST)
+ new_stmt = gimple_build_call_internal (mask_reduc_fn, 3, reduc_var,
+ def0, mask);
+ else
+ new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var,
+ def0);
/* For chained SLP reductions the output of the previous reduction
operation serves as the input of the next. For the final statement
the output cannot be a temporary - we reuse the original
/* Remove the statement, so that we can use the same code paths
as for statements that we've just created. */
gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt);
- gsi_remove (&tmp_gsi, false);
+ gsi_remove (&tmp_gsi, true);
}
if (i == vec_num - 1)
does not cause overflow. */
static bool
-is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
+is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, class loop *loop)
{
gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
<= TYPE_PRECISION (lhs_type));
}
+/* Check if masking can be supported by inserting a conditional expression.
+ CODE is the code for the operation. COND_FN is the conditional internal
+ function, if it exists. VECTYPE_IN is the type of the vector input. */
+static bool
+use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn,
+ tree vectype_in)
+{
+ if (cond_fn != IFN_LAST
+ && direct_internal_fn_supported_p (cond_fn, vectype_in,
+ OPTIMIZE_FOR_SPEED))
+ return false;
+
+ switch (code)
+ {
+ case DOT_PROD_EXPR:
+ case SAD_EXPR:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* Insert a conditional expression to enable masked vectorization. CODE is the
+ code for the operation. VOP is the array of operands. MASK is the loop
+ mask. GSI is a statement iterator used to place the new conditional
+ expression. */
+static void
+build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
+ gimple_stmt_iterator *gsi)
+{
+ switch (code)
+ {
+ case DOT_PROD_EXPR:
+ {
+ tree vectype = TREE_TYPE (vop[1]);
+ tree zero = build_zero_cst (vectype);
+ tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
+ gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
+ mask, vop[1], zero);
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
+ vop[1] = masked_op1;
+ break;
+ }
+
+ case SAD_EXPR:
+ {
+ tree vectype = TREE_TYPE (vop[1]);
+ tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
+ gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
+ mask, vop[1], vop[0]);
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
+ vop[1] = masked_op1;
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Function vectorizable_reduction.
Check if STMT_INFO performs a reduction operation that can be vectorized.
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
tree vectype_in = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, orig_code;
internal_fn reduc_fn;
machine_mode vec_mode;
bool nested_cycle = false, found_nested_cycle_def = false;
bool double_reduc = false;
basic_block def_bb;
- struct loop * def_stmt_loop;
+ class loop * def_stmt_loop;
tree def_arg;
auto_vec<tree> vec_oprnds0;
auto_vec<tree> vec_oprnds1;
return true;
gassign *reduc_stmt = as_a <gassign *> (reduc_stmt_info->stmt);
+ code = gimple_assign_rhs_code (reduc_stmt);
for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
{
tree op = gimple_op (reduc_stmt, k);
if (op == phi_result)
continue;
- if (k == 1
- && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
+ if (k == 1 && code == COND_EXPR)
+ continue;
+ bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt);
+ gcc_assert (is_simple_use);
+ if (dt == vect_constant_def || dt == vect_external_def)
continue;
if (!vectype_in
|| (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
+ /* For a nested cycle we might end up with an operation like
+ phi_result * phi_result. */
+ if (!vectype_in)
+ vectype_in = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype_in);
if (slp_node)
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- stmt_vec_info reduc_def_info = NULL;
+ stmt_vec_info reduc_def_info;
+ if (orig_stmt_info)
+ reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
+ else
+ reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
+ gcc_assert (reduc_def_info);
+ gphi *reduc_def_phi = as_a <gphi *> (reduc_def_info->stmt);
+ tree reduc_def = PHI_RESULT (reduc_def_phi);
int reduc_index = -1;
for (i = 0; i < op_type; i++)
{
&def_stmt_info);
dt = dts[i];
gcc_assert (is_simple_use);
- if (dt == vect_reduction_def)
+ if (dt == vect_reduction_def
+ && ops[i] == reduc_def)
{
- reduc_def_info = def_stmt_info;
reduc_index = i;
continue;
}
&& !(dt == vect_nested_cycle && nested_cycle))
return false;
- if (dt == vect_nested_cycle)
+ if (dt == vect_nested_cycle
+ && ops[i] == reduc_def)
{
found_nested_cycle_def = true;
- reduc_def_info = def_stmt_info;
reduc_index = i;
}
"in-order reduction chain without SLP.\n");
return false;
}
-
- if (orig_stmt_info)
- reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
- else
- reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
}
- if (! reduc_def_info)
- return false;
-
- gphi *reduc_def_phi = dyn_cast <gphi *> (reduc_def_info->stmt);
- if (!reduc_def_phi)
- return false;
-
if (!(reduc_index == -1
|| dts[reduc_index] == vect_reduction_def
|| dts[reduc_index] == vect_nested_cycle
vec_mode = TYPE_MODE (vectype_in);
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+ if (nested_cycle)
+ {
+ def_bb = gimple_bb (reduc_def_phi);
+ def_stmt_loop = def_bb->loop_father;
+ def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+ loop_preheader_edge (def_stmt_loop));
+ stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
+ if (def_arg_stmt_info
+ && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
+ == vect_double_reduction_def))
+ double_reduc = true;
+ }
+
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
+ && ncopies > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "multiple types in double reduction or condition "
+ "reduction.\n");
+ return false;
+ }
+
if (code == COND_EXPR)
{
/* Only call during the analysis stage, otherwise we'll lose
STMT_VINFO_TYPE. */
if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
- ops[reduc_index], 0, NULL,
- cost_vec))
+ true, NULL, cost_vec))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return false;
}
}
- else
+ else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
+ || code == LROTATE_EXPR || code == RROTATE_EXPR)
{
- /* 4. Supportable by target? */
-
- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
- || code == LROTATE_EXPR || code == RROTATE_EXPR)
+ /* Only call during the analysis stage, otherwise we'll lose
+ STMT_VINFO_TYPE. We only support this for nested cycles
+ without double reductions at the moment. */
+ if (!nested_cycle
+ || double_reduc
+ || (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
+ NULL, cost_vec)))
{
- /* Shifts and rotates are only supported by vectorizable_shifts,
- not vectorizable_reduction. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported shift or rotation.\n");
+ "unsupported shift or rotation in reduction\n");
return false;
}
+ }
+ else
+ {
+ /* 4. Supportable by target? */
/* 4.1. check support for the operation in the loop */
optab = optab_for_tree_code (code, vectype_in, optab_default);
(and also the same tree-code) when generating the epilog code and
when generating the code inside the loop. */
- vect_reduction_type reduction_type
- = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
if (orig_stmt_info
&& (reduction_type == TREE_CODE_REDUCTION
|| reduction_type == FOLD_LEFT_REDUCTION))
orig_code = cond_reduc_op_code;
}
- if (nested_cycle)
- {
- def_bb = gimple_bb (reduc_def_phi);
- def_stmt_loop = def_bb->loop_father;
- def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
- loop_preheader_edge (def_stmt_loop));
- stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
- if (def_arg_stmt_info
- && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
- == vect_double_reduction_def))
- double_reduc = true;
- }
-
reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION
return false;
}
- if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
- && ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in double reduction or condition "
- "reduction.\n");
- return false;
- }
-
/* For SLP reductions, see if there is a neutral value we can use. */
tree neutral_op = NULL_TREE;
if (slp_node)
internal_fn cond_fn = get_conditional_internal_fn (code);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
if (!vec_stmt) /* transformation not required. */
{
if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
{
if (reduction_type != FOLD_LEFT_REDUCTION
+ && !mask_by_cond_expr
&& (cond_fn == IFN_LAST
|| !direct_internal_fn_supported_p (cond_fn, vectype_in,
OPTIMIZE_FOR_SPEED)))
{
gcc_assert (!slp_node);
return vectorizable_condition (stmt_info, gsi, vec_stmt,
- NULL, reduc_index, NULL, NULL);
+ true, NULL, NULL);
}
/* Create the destination vector */
{
gcc_assert (!slp_node);
vectorizable_condition (stmt_info, gsi, vec_stmt,
- PHI_RESULT (phis[0]->stmt),
- reduc_index, NULL, NULL);
- /* Multiple types are not supported for condition. */
+ true, NULL, NULL);
break;
}
+ if (code == LSHIFT_EXPR
+ || code == RSHIFT_EXPR)
+ {
+ vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
+ break;
+ }
/* Handle uses. */
if (j == 0)
FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
{
tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
- if (masked_loop_p)
+ if (masked_loop_p && !mask_by_cond_expr)
{
/* Make sure that the reduction accumulator is vop[0]. */
if (reduc_index == 1)
if (op_type == ternary_op)
vop[2] = vec_oprnds2[i];
+ if (masked_loop_p && mask_by_cond_expr)
+ {
+ tree mask = vect_get_loop_mask (gsi, masks,
+ vec_num * ncopies,
+ vectype_in, i * ncopies + j);
+ build_vect_cond_expr (code, vop, mask, gsi);
+ }
+
gassign *new_stmt = gimple_build_assign (vec_dest, code,
vop[0], vop[1], vop[2]);
new_temp = make_ssa_name (vec_dest, new_stmt);
stmt_vector_for_cost *cost_vec)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned ncopies;
bool nested_in_vect_loop = false;
- struct loop *iv_loop;
+ class loop *iv_loop;
tree vec_def;
edge pe = loop_preheader_edge (loop);
basic_block new_bb;
stmt_vector_for_cost *)
{
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
imm_use_iterator imm_iter;
tree lhs, lhs_type, bitsize, vec_bitsize;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
/* Kill any debug uses outside LOOP of SSA names defined in STMT_INFO. */
static void
-vect_loop_kill_debug_uses (struct loop *loop, stmt_vec_info stmt_info)
+vect_loop_kill_debug_uses (class loop *loop, stmt_vec_info stmt_info)
{
ssa_op_iter op_iter;
imm_use_iterator imm_iter;
}
widest_int max;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Check the upper bound of loop niters. */
if (get_max_loop_iterations (loop, &max))
{
by factor VF. */
static void
-scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
+scale_profile_for_vect_loop (class loop *loop, unsigned vf)
{
edge preheader = loop_preheader_edge (loop);
/* Reduce loop iterations by the vectorization factor. */
vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
if (dump_enabled_p ())
stmts in the loop, and update the loop exit condition.
Returns scalar epilogue loop if any. */
-struct loop *
+class loop *
vect_transform_loop (loop_vec_info loop_vinfo)
{
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- struct loop *epilogue = NULL;
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ class loop *epilogue = NULL;
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
int i;
edge e = single_exit (loop);
if (! single_pred_p (e->dest))
{
- split_loop_exit_edge (e);
+ split_loop_exit_edge (e, true);
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "split exit edge\n");
}
versioning_threshold);
check_profitability = false;
}
- vect_loop_versioning (loop_vinfo, th, check_profitability,
- versioning_threshold);
+ class loop *sloop
+ = vect_loop_versioning (loop_vinfo, th, check_profitability,
+ versioning_threshold);
+ sloop->force_vectorize = false;
check_profitability = false;
}
e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
if (! single_pred_p (e->dest))
{
- split_loop_exit_edge (e);
+ split_loop_exit_edge (e, true);
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
}
epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
&step_vector, &niters_vector_mult_vf, th,
check_profitability, niters_no_overflow);
+ if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)
+ && LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ())
+ scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo),
+ LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
if (niters_vector == NULL_TREE)
{
}
}
+ /* Loops vectorized with a variable factor won't benefit from
+ unrolling/peeling. */
+ if (!vf.is_constant ())
+ {
+ loop->unroll = 1;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
+ " variable-length vectorization factor\n");
+ }
/* Free SLP instances here because otherwise stmt reference counting
won't work. */
slp_instance instance;
if (epilogue)
{
auto_vector_sizes vector_sizes;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
+ /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
+ on niters already ajusted for the iterations of the prologue. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
&& known_eq (vf, lowest_vf))
{
- unsigned int eiters
+ unsigned HOST_WIDE_INT eiters
= (LOOP_VINFO_INT_NITERS (loop_vinfo)
- - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
- eiters = eiters % lowest_vf;
+ - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
+ eiters
+ = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
epilogue->nb_iterations_upper_bound = eiters - 1;
+ epilogue->any_upper_bound = true;
unsigned int ratio;
while (next_size < vector_sizes.length ()
*/
void
-optimize_mask_stores (struct loop *loop)
+optimize_mask_stores (class loop *loop)
{
basic_block *bbs = get_loop_body (loop);
unsigned nbbs = loop->num_nodes;
unsigned i;
basic_block bb;
- struct loop *bb_loop;
+ class loop *bb_loop;
gimple_stmt_iterator gsi;
gimple *stmt;
auto_vec<gimple *> worklist;
+ auto_purge_vect_location sentinel;
vect_location = find_loop_location (loop);
/* Pick up all masked stores in loop if any. */
add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
}
}
+
+/* Decide whether it is possible to use a zero-based induction variable
+ when vectorizing LOOP_VINFO with a fully-masked loop. If it is,
+ return the value that the induction variable must be able to hold
+ in order to ensure that the loop ends with an all-false mask.
+ Return -1 otherwise. */
+widest_int
+vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo)
+{
+ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
+
+ /* Calculate the value that the induction variable must be able
+ to hit in order to ensure that we end the loop with an all-false mask.
+ This involves adding the maximum number of inactive trailing scalar
+ iterations. */
+ widest_int iv_limit = -1;
+ if (max_loop_iterations (loop, &iv_limit))
+ {
+ if (niters_skip)
+ {
+ /* Add the maximum number of skipped iterations to the
+ maximum iteration count. */
+ if (TREE_CODE (niters_skip) == INTEGER_CST)
+ iv_limit += wi::to_widest (niters_skip);
+ else
+ iv_limit += max_vf - 1;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ /* Make a conservatively-correct assumption. */
+ iv_limit += max_vf - 1;
+
+ /* IV_LIMIT is the maximum number of latch iterations, which is also
+ the maximum in-range IV value. Round this value down to the previous
+ vector alignment boundary and then add an extra full iteration. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
+ }
+ return iv_limit;
+}
+