vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
SLP_INSTANCE_LOADS (instance).release ();
SLP_INSTANCE_ROOT_STMTS (instance).release ();
- SLP_INSTANCE_REMAIN_STMTS (instance).release ();
+ SLP_INSTANCE_REMAIN_DEFS (instance).release ();
instance->subgraph_entries.release ();
instance->cost_vec.release ();
free (instance);
slp_instance_kind kind,
vec<stmt_vec_info> &scalar_stmts,
vec<stmt_vec_info> &root_stmt_infos,
+ vec<tree> &remain,
unsigned max_tree_size, unsigned *limit,
scalar_stmts_to_slp_tree_map_t *bst_map,
/* ??? We need stmt_info for group splitting. */
??? Selecting the optimal set of lanes to vectorize would be nice
but SLP build for all lanes will fail quickly because we think
we're going to need unrolling. */
- auto_vec<stmt_vec_info> remain;
if (kind == slp_inst_kind_bb_reduc
&& (scalar_stmts.length () & 1))
- remain.safe_push (scalar_stmts.pop ());
+ remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
- if (!remain.is_empty ())
- SLP_INSTANCE_REMAIN_STMTS (new_instance) = remain.copy ();
- else
- SLP_INSTANCE_REMAIN_STMTS (new_instance) = vNULL;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
SLP_INSTANCE_KIND (new_instance) = kind;
new_instance->reduc_phis = NULL;
new_instance->cost_vec = vNULL;
gcc_unreachable ();
vec<stmt_vec_info> roots = vNULL;
+ vec<tree> remain = vNULL;
if (kind == slp_inst_kind_ctor)
{
roots.create (1);
}
/* Build the tree for the SLP instance. */
bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
- roots,
+ roots, remain,
max_tree_size, limit, bst_map,
kind == slp_inst_kind_store
? stmt_info : NULL);
if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
bb_vinfo->roots[i].stmts,
bb_vinfo->roots[i].roots,
+ bb_vinfo->roots[i].remain,
max_tree_size, &limit, bst_map, NULL))
{
bb_vinfo->roots[i].stmts = vNULL;
bb_vinfo->roots[i].roots = vNULL;
+ bb_vinfo->roots[i].remain = vNULL;
}
}
}
{
roots[i].stmts.release ();
roots[i].roots.release ();
+ roots[i].remain.release ();
}
roots.release ();
}
|| !direct_internal_fn_supported_p (reduc_fn, vectype, OPTIMIZE_FOR_BOTH)
|| !useless_type_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
TREE_TYPE (vectype)))
- return false;
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: basic block reduction epilogue "
+ "operation unsupported.\n");
+ return false;
+ }
/* There's no way to cost a horizontal vector reduction via REDUC_FN so
cost log2 vector operations plus shuffles and one extraction. */
but record those to be handled in the epilogue. */
/* ??? For now do not allow mixing ops or externs/constants. */
bool invalid = false;
+ unsigned remain_cnt = 0;
for (unsigned i = 0; i < chain.length (); ++i)
- if (chain[i].dt != vect_internal_def
- || chain[i].code != code)
- invalid = true;
- if (!invalid)
+ {
+ if (chain[i].code != code)
+ {
+ invalid = true;
+ break;
+ }
+ if (chain[i].dt != vect_internal_def)
+ remain_cnt++;
+ }
+ if (!invalid && chain.length () - remain_cnt > 1)
{
vec<stmt_vec_info> stmts;
+ vec<tree> remain = vNULL;
stmts.create (chain.length ());
+ if (remain_cnt > 0)
+ remain.create (remain_cnt);
for (unsigned i = 0; i < chain.length (); ++i)
- stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+ {
+ if (chain[i].dt == vect_internal_def)
+ stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+ else
+ remain.quick_push (chain[i].op);
+ }
vec<stmt_vec_info> roots;
roots.create (chain_stmts.length ());
for (unsigned i = 0; i < chain_stmts.length (); ++i)
roots.quick_push (bb_vinfo->lookup_stmt (chain_stmts[i]));
bb_vinfo->roots.safe_push (slp_root (slp_inst_kind_bb_reduc,
- stmts, roots));
+ stmts, roots, remain));
}
}
}
gcc_unreachable ();
tree scalar_def = gimple_build (&epilogue, as_combined_fn (reduc_fn),
TREE_TYPE (TREE_TYPE (vec_def)), vec_def);
- if (!SLP_INSTANCE_REMAIN_STMTS (instance).is_empty ())
+ if (!SLP_INSTANCE_REMAIN_DEFS (instance).is_empty ())
{
tree rem_def = NULL_TREE;
- for (auto rem : SLP_INSTANCE_REMAIN_STMTS (instance))
+ for (auto def : SLP_INSTANCE_REMAIN_DEFS (instance))
if (!rem_def)
- rem_def = gimple_get_lhs (rem->stmt);
+ rem_def = def;
else
rem_def = gimple_build (&epilogue, reduc_code,
TREE_TYPE (scalar_def),
- rem_def, gimple_get_lhs (rem->stmt));
+ rem_def, def);
scalar_def = gimple_build (&epilogue, reduc_code,
TREE_TYPE (scalar_def),
scalar_def, rem_def);
/* For slp_inst_kind_bb_reduc the defs that were not vectorized, NULL
otherwise. */
- vec<stmt_vec_info> remain_stmts;
+ vec<tree> remain_defs;
/* The unrolling factor required to vectorized this SLP instance. */
poly_uint64 unrolling_factor;
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
#define SLP_INSTANCE_LOADS(S) (S)->loads
#define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts
-#define SLP_INSTANCE_REMAIN_STMTS(S) (S)->remain_stmts
+#define SLP_INSTANCE_REMAIN_DEFS(S) (S)->remain_defs
#define SLP_INSTANCE_KIND(S) (S)->kind
#define SLP_TREE_CHILDREN(S) (S)->children
struct slp_root
{
slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
- vec<stmt_vec_info> roots_)
- : kind(kind_), stmts(stmts_), roots(roots_) {}
+ vec<stmt_vec_info> roots_, vec<tree> remain_ = vNULL)
+ : kind(kind_), stmts(stmts_), roots(roots_), remain(remain_) {}
slp_instance_kind kind;
vec<stmt_vec_info> stmts;
vec<stmt_vec_info> roots;
+ vec<tree> remain;
};
typedef class _bb_vec_info : public vec_info