#include "vec-perm-indices.h"
#include "gimple-fold.h"
#include "internal-fn.h"
+#include "dump-context.h"
+/* Initialize a SLP node. */
+
+_slp_tree::_slp_tree ()
+{
+ SLP_TREE_SCALAR_STMTS (this) = vNULL;
+ SLP_TREE_SCALAR_OPS (this) = vNULL;
+ SLP_TREE_VEC_STMTS (this) = vNULL;
+ SLP_TREE_VEC_DEFS (this) = vNULL;
+ SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
+ SLP_TREE_CHILDREN (this) = vNULL;
+ SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
+ SLP_TREE_TWO_OPERATORS (this) = false;
+ SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
+ SLP_TREE_VECTYPE (this) = NULL_TREE;
+ SLP_TREE_REPRESENTATIVE (this) = NULL;
+ this->refcnt = 1;
+ this->max_nunits = 1;
+}
+
+/* Tear down a SLP node. */
+
+_slp_tree::~_slp_tree ()
+{
+ SLP_TREE_CHILDREN (this).release ();
+ SLP_TREE_SCALAR_STMTS (this).release ();
+ SLP_TREE_SCALAR_OPS (this).release ();
+ SLP_TREE_VEC_STMTS (this).release ();
+ SLP_TREE_VEC_DEFS (this).release ();
+ SLP_TREE_LOAD_PERMUTATION (this).release ();
+}
+
/* Recursively free the memory allocated for the SLP tree rooted at NODE.
FINAL_P is true if we have vectorized the instance or if we have
made a final decision not to vectorize the statements in any way. */
}
}
- SLP_TREE_CHILDREN (node).release ();
- SLP_TREE_SCALAR_STMTS (node).release ();
- SLP_TREE_SCALAR_OPS (node).release ();
- SLP_TREE_VEC_STMTS (node).release ();
- SLP_TREE_LOAD_PERMUTATION (node).release ();
-
- free (node);
+ delete node;
}
/* Free the memory allocated for the SLP instance. FINAL_P is true if we
/* Create an SLP node for SCALAR_STMTS. */
static slp_tree
-vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
+vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
{
- slp_tree node;
- stmt_vec_info stmt_info = scalar_stmts[0];
- unsigned int nops;
-
- if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
- nops = gimple_call_num_args (stmt);
- else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
- {
- nops = gimple_num_ops (stmt) - 1;
- if (gimple_assign_rhs_code (stmt) == COND_EXPR)
- nops++;
- }
- else if (is_a <gphi *> (stmt_info->stmt))
- nops = 0;
- else
- return NULL;
-
- node = XNEW (struct _slp_tree);
+ slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
- SLP_TREE_SCALAR_OPS (node) = vNULL;
- SLP_TREE_VEC_STMTS (node).create (0);
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
SLP_TREE_CHILDREN (node).create (nops);
- SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
- SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
- node->refcnt = 1;
- node->max_nunits = 1;
+ SLP_TREE_REPRESENTATIVE (node) = scalar_stmts[0];
unsigned i;
+ stmt_vec_info stmt_info;
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
STMT_VINFO_NUM_SLP_USES (stmt_info)++;
static slp_tree
vect_create_new_slp_node (vec<tree> ops)
{
- slp_tree node;
-
- node = XNEW (struct _slp_tree);
- SLP_TREE_SCALAR_STMTS (node) = vNULL;
+ slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_OPS (node) = ops;
- SLP_TREE_VEC_STMTS (node).create (0);
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
- SLP_TREE_CHILDREN (node) = vNULL;
- SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
- SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_external_def;
- node->refcnt = 1;
- node->max_nunits = 1;
-
return node;
}
else
return NULL;
(*tree_size)++;
- node = vect_create_new_slp_node (stmts);
+ node = vect_create_new_slp_node (stmts, 0);
return node;
}
{
*max_nunits = this_max_nunits;
(*tree_size)++;
- node = vect_create_new_slp_node (stmts);
+ node = vect_create_new_slp_node (stmts, 0);
/* And compute the load permutation. Whether it is actually
a permutation depends on the unrolling factor which is
decided later. */
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
this_tree_size++;
- child = vect_create_new_slp_node (oprnd_info->def_stmts);
+ child = vect_create_new_slp_node (oprnd_info->def_stmts, 0);
SLP_TREE_DEF_TYPE (child) = vect_external_def;
SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
children.safe_push (child);
*tree_size += this_tree_size + 1;
*max_nunits = this_max_nunits;
- node = vect_create_new_slp_node (stmts);
+ node = vect_create_new_slp_node (stmts, nops);
SLP_TREE_TWO_OPERATORS (node) = two_operators;
SLP_TREE_CHILDREN (node).splice (children);
return node;
}
-/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
+/* Dump a single SLP tree NODE. */
static void
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
- slp_tree node, hash_set<slp_tree> &visited)
+ slp_tree node)
{
unsigned i, j;
- stmt_vec_info stmt_info;
slp_tree child;
+ stmt_vec_info stmt_info;
tree op;
- if (visited.add (node))
- return;
-
dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
dump_user_location_t user_loc = loc.get_user_location ();
dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
dump_printf (dump_kind, " %p", (void *)child);
dump_printf (dump_kind, "\n");
+}
+
+DEBUG_FUNCTION void
+debug (slp_tree node)
+{
+ debug_dump_context ctx;
+ vect_print_slp_tree (MSG_NOTE,
+ dump_location_t::from_location_t (UNKNOWN_LOCATION),
+ node);
+}
+
+/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
+
+static void
+vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc,
+ slp_tree node, hash_set<slp_tree> &visited)
+{
+ unsigned i;
+ slp_tree child;
+
+ if (visited.add (node))
+ return;
+
+ vect_print_slp_tree (dump_kind, loc, node);
+
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_print_slp_tree (dump_kind, loc, child, visited);
+ vect_print_slp_graph (dump_kind, loc, child, visited);
}
static void
-vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
- slp_tree node)
+vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc,
+ slp_tree entry)
{
hash_set<slp_tree> visited;
- vect_print_slp_tree (dump_kind, loc, node, visited);
+ vect_print_slp_graph (dump_kind, loc, entry, visited);
}
/* Mark the tree rooted at NODE with PURE_SLP. */
if (existed_p)
return copy_ref;
- copy_ref = XNEW (_slp_tree);
+ copy_ref = new _slp_tree;
slp_tree copy = copy_ref;
- memcpy (copy, node, sizeof (_slp_tree));
+ SLP_TREE_DEF_TYPE (copy) = SLP_TREE_DEF_TYPE (node);
+ SLP_TREE_VECTYPE (copy) = SLP_TREE_VECTYPE (node);
+ SLP_TREE_REPRESENTATIVE (copy) = SLP_TREE_REPRESENTATIVE (node);
+ copy->max_nunits = node->max_nunits;
+ copy->refcnt = 0;
if (SLP_TREE_SCALAR_STMTS (node).exists ())
{
SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
if (SLP_TREE_CHILDREN (node).exists ())
SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
- copy->refcnt = 0;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
if (SLP_TREE_SCALAR_STMTS (node).exists ())
{
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
- /* ??? Computation nodes are isomorphic and need no rearrangement.
- This is a quick hack to cover those where rearrangement breaks
- semantics because only the first stmt is guaranteed to have the
- correct operation code due to others being swapped or inverted. */
- stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
- if (is_gimple_assign (first->stmt)
- && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
- return;
vec<stmt_vec_info> tmp_stmts;
tmp_stmts.create (group_size);
tmp_stmts.quick_grow (group_size);
static bool
vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
{
- unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j;
unsigned int lidx;
slp_tree node, load;
+ if (SLP_INSTANCE_LOADS (slp_instn).is_empty ())
+ return false;
+
/* Compare all the permutation sequences to the first one. We know
that at least one load is permuted. */
node = SLP_INSTANCE_LOADS (slp_instn)[0];
- if (!node->load_permutation.exists ())
+ if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
return false;
+ unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length ();
for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
{
- if (!load->load_permutation.exists ())
+ if (!SLP_TREE_LOAD_PERMUTATION (load).exists ()
+ || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size)
return false;
- FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
- if (lidx != node->load_permutation[j])
+ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx)
+ if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j])
return false;
}
/* Gather loads in the SLP graph NODE and populate the INST loads array. */
static void
-vect_gather_slp_loads (slp_instance inst, slp_tree node,
+vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
hash_set<slp_tree> &visited)
{
if (visited.add (node))
stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
- SLP_INSTANCE_LOADS (inst).safe_push (node);
+ loads.safe_push (node);
}
else
{
unsigned i;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_gather_slp_loads (inst, child, visited);
+ vect_gather_slp_loads (loads, child, visited);
}
}
vect_gather_slp_loads (slp_instance inst, slp_tree node)
{
hash_set<slp_tree> visited;
- vect_gather_slp_loads (inst, node, visited);
-}
-
-/* Check if the required load permutations in the SLP instance
- SLP_INSTN are supported. */
-
-static bool
-vect_supported_load_permutation_p (vec_info *vinfo, slp_instance slp_instn)
-{
- unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
- unsigned int i, j, k, next;
- slp_tree node;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (node->load_permutation.exists ())
- FOR_EACH_VEC_ELT (node->load_permutation, j, next)
- dump_printf (MSG_NOTE, "%d ", next);
- else
- for (k = 0; k < group_size; ++k)
- dump_printf (MSG_NOTE, "%d ", k);
- dump_printf (MSG_NOTE, "\n");
- }
-
- /* In case of reduction every load permutation is allowed, since the order
- of the reduction statements is not important (as opposed to the case of
- grouped stores). The only condition we need to check is that all the
- load nodes are of the same size and have the same permutation (and then
- rearrange all the nodes of the SLP instance according to this
- permutation). */
-
- /* Check that all the load nodes are of the same size. */
- /* ??? Can't we assert this? */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
- return false;
-
- node = SLP_INSTANCE_TREE (slp_instn);
- stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
-
- /* Reduction (there are no data-refs in the root).
- In reduction chain the order of the loads is not important. */
- if (!STMT_VINFO_DATA_REF (stmt_info)
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
- vect_attempt_slp_rearrange_stmts (slp_instn);
-
- /* In basic block vectorization we allow any subchain of an interleaving
- chain.
- FORNOW: not supported in loop SLP because of realignment compications. */
- if (is_a <bb_vec_info> (vinfo))
- {
- /* Check whether the loads in an instance form a subchain and thus
- no permutation is necessary. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- {
- if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
- continue;
- bool subchain_p = true;
- stmt_vec_info next_load_info = NULL;
- stmt_vec_info load_info;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
- {
- if (j != 0
- && (next_load_info != load_info
- || DR_GROUP_GAP (load_info) != 1))
- {
- subchain_p = false;
- break;
- }
- next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
- }
- if (subchain_p)
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- else
- {
- stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
- group_info = DR_GROUP_FIRST_ELEMENT (group_info);
- unsigned HOST_WIDE_INT nunits;
- unsigned k, maxk = 0;
- FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
- if (k > maxk)
- maxk = k;
- /* In BB vectorization we may not actually use a loaded vector
- accessing elements in excess of DR_GROUP_SIZE. */
- tree vectype = STMT_VINFO_VECTYPE (group_info);
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
- || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "BB vectorization with gaps at the end of "
- "a load is not supported\n");
- return false;
- }
-
- /* Verify the permutation can be generated. */
- vec<tree> tem;
- unsigned n_perms;
- if (!vect_transform_slp_perm_load (vinfo, node, tem, NULL,
- 1, true, &n_perms))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "unsupported load permutation\n");
- return false;
- }
- }
- }
- return true;
- }
-
- /* For loop vectorization verify we can generate the permutation. Be
- conservative about the vectorization factor, there are permutations
- that will use three vector inputs only starting from a specific factor
- and the vectorization factor is not yet final.
- ??? The SLP instance unrolling factor might not be the maximum one. */
- unsigned n_perms;
- poly_uint64 test_vf
- = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
- LOOP_VINFO_VECT_FACTOR
- (as_a <loop_vec_info> (vinfo)));
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (node->load_permutation.exists ()
- && !vect_transform_slp_perm_load (vinfo, node, vNULL, NULL, test_vf,
- true, &n_perms))
- return false;
-
- return true;
+ vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst), node, visited);
}
/* Create a new SLP instance. */
new_instance = XNEW (class _slp_instance);
SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
"SLP size %u vs. limit %u.\n",
tree_size, max_tree_size);
- /* Compute the load permutation. */
+ /* Check whether any load is possibly permuted. */
slp_tree load_node;
bool loads_permuted = false;
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
continue;
unsigned j;
stmt_vec_info load_info;
- bool this_load_permuted = false;
- stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
- (SLP_TREE_SCALAR_STMTS (load_node)[0]);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
{
- this_load_permuted = true;
+ loads_permuted = true;
break;
}
- if (!this_load_permuted
- /* The load requires permutation when unrolling exposes
- a gap either because the group is larger than the SLP
- group-size or because there is a gap between the groups. */
- && (known_eq (unrolling_factor, 1U)
- || (group_size == DR_GROUP_SIZE (first_stmt_info)
- && DR_GROUP_GAP (first_stmt_info) == 0)))
- {
- SLP_TREE_LOAD_PERMUTATION (load_node).release ();
- continue;
- }
- loads_permuted = true;
- }
-
- if (loads_permuted)
- {
- if (!vect_supported_load_permutation_p (vinfo, new_instance))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported load "
- "permutation %G", stmt_info->stmt);
- vect_free_slp_instance (new_instance, false);
- return false;
- }
}
- /* If the loads and stores can be handled with load/store-lan
+ /* If the loads and stores can be handled with load/store-lane
instructions do not generate this SLP instance. */
if (is_a <loop_vec_info> (vinfo)
&& loads_permuted
scalar_stmts.create (group_size);
for (unsigned i = 0; i < group_size; ++i)
scalar_stmts.quick_push (next_info);
- slp_tree conv = vect_create_new_slp_node (scalar_stmts);
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
SLP_TREE_CHILDREN (conv).quick_push (node);
SLP_INSTANCE_TREE (new_instance) = conv;
/* We also have to fake this conversion stmt as SLP reduction
vinfo->slp_instances.safe_push (new_instance);
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Final SLP tree for instance:\n");
- vect_print_slp_tree (MSG_NOTE, vect_location,
- SLP_INSTANCE_TREE (new_instance));
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
}
return true;
vect_free_slp_tree ((*it).second, false);
delete bst_map;
+ /* Optimize permutations in SLP reductions. */
+ slp_instance instance;
+ FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
+ {
+ slp_tree node = SLP_INSTANCE_TREE (instance);
+ stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+ /* Reduction (there are no data-refs in the root).
+ In reduction chain the order of the loads is not important. */
+ if (!STMT_VINFO_DATA_REF (stmt_info)
+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ vect_attempt_slp_rearrange_stmts (instance);
+ }
+
+ /* Gather all loads in the SLP graph. */
+ hash_set<slp_tree> visited;
+ FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
+ vect_gather_slp_loads (vinfo->slp_loads, SLP_INSTANCE_TREE (instance),
+ visited);
+
return opt_result::success ();
}
+void
+vect_optimize_slp (vec_info *vinfo)
+{
+ slp_tree node;
+ unsigned i;
+ FOR_EACH_VEC_ELT (vinfo->slp_loads, i, node)
+ {
+ if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
+ continue;
+
+ /* In basic block vectorization we allow any subchain of an interleaving
+ chain.
+ FORNOW: not in loop SLP because of realignment complications. */
+ if (is_a <bb_vec_info> (vinfo))
+ {
+ bool subchain_p = true;
+ stmt_vec_info next_load_info = NULL;
+ stmt_vec_info load_info;
+ unsigned j;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
+ {
+ if (j != 0
+ && (next_load_info != load_info
+ || DR_GROUP_GAP (load_info) != 1))
+ {
+ subchain_p = false;
+ break;
+ }
+ next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
+ }
+ if (subchain_p)
+ {
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ continue;
+ }
+ }
+ else
+ {
+ stmt_vec_info load_info;
+ bool this_load_permuted = false;
+ unsigned j;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
+ if (SLP_TREE_LOAD_PERMUTATION (node)[j] != j)
+ {
+ this_load_permuted = true;
+ break;
+ }
+ stmt_vec_info first_stmt_info
+ = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+ if (!this_load_permuted
+ /* The load requires permutation when unrolling exposes
+ a gap either because the group is larger than the SLP
+ group-size or because there is a gap between the groups. */
+ && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a <loop_vec_info> (vinfo)), 1U)
+ || ((SLP_TREE_SCALAR_STMTS (node).length ()
+ == DR_GROUP_SIZE (first_stmt_info))
+ && DR_GROUP_GAP (first_stmt_info) == 0)))
+ {
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ continue;
+ }
+ }
+ }
+}
+
/* For each possible SLP instance decide whether to SLP it and calculate overall
unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
slp_instance node_instance,
stmt_vector_for_cost *cost_vec)
{
- stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+ stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
/* Calculate the number of vector statements to be created for the
vf = loop_vinfo->vectorization_factor;
else
vf = 1;
- unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
+ unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
= vect_get_num_vectors (vf * group_size, vectype);
return true;
}
+/* Compute the prologue cost for invariant or constant operands represented
+ by NODE. */
+
+static void
+vect_prologue_cost_for_slp (slp_tree node,
+ stmt_vector_for_cost *cost_vec)
+{
+ /* Without looking at the actual initializer a vector of
+ constants can be implemented as load from the constant pool.
+ When all elements are the same we can use a splat. */
+ tree vectype = SLP_TREE_VECTYPE (node);
+ unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
+ unsigned num_vects_to_check;
+ unsigned HOST_WIDE_INT const_nunits;
+ unsigned nelt_limit;
+ if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
+ && ! multiple_p (const_nunits, group_size))
+ {
+ num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+ nelt_limit = const_nunits;
+ }
+ else
+ {
+ /* If either the vector has variable length or the vectors
+ are composed of repeated whole groups we only need to
+ cost construction once. All vectors will be the same. */
+ num_vects_to_check = 1;
+ nelt_limit = group_size;
+ }
+ tree elt = NULL_TREE;
+ unsigned nelt = 0;
+ for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
+ {
+ unsigned si = j % group_size;
+ if (nelt == 0)
+ elt = SLP_TREE_SCALAR_OPS (node)[si];
+ /* ??? We're just tracking whether all operands of a single
+ vector initializer are the same, ideally we'd check if
+ we emitted the same one already. */
+ else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
+ elt = NULL_TREE;
+ nelt++;
+ if (nelt == nelt_limit)
+ {
+ record_stmt_cost (cost_vec, 1,
+ SLP_TREE_DEF_TYPE (node) == vect_external_def
+ ? (elt ? scalar_to_vec : vec_construct)
+ : vector_load,
+ NULL, vectype, 0, vect_prologue);
+ nelt = 0;
+ }
+ }
+}
+
/* Analyze statements contained in SLP tree NODE after recursively analyzing
the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
int i, j;
slp_tree child;
+ /* Assume we can code-generate all invariants. */
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return true;
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
+ /* When the node can be vectorized cost invariant nodes it references.
+ This is not done in DFS order to allow the refering node
+ vectorizable_* calls to nail down the invariant nodes vector type
+ and possibly unshare it if it needs a different vector type than
+ other referrers. */
+ if (res)
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+ if ((SLP_TREE_DEF_TYPE (child) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (child) == vect_external_def)
+ /* Perform usual caching, note code-generation still
+ code-gens these nodes multiple times but we expect
+ to CSE them later. */
+ && !visited.contains (child)
+ && !lvisited.add (child))
+ {
+ /* ??? After auditing more code paths make a "default"
+ and push the vector type from NODE to all children
+ if it is not already set. */
+ /* Compute the number of vectors to be generated. */
+ tree vector_type = SLP_TREE_VECTYPE (child);
+ if (!vector_type)
+ {
+ /* For shifts with a scalar argument we don't need
+ to cost or code-generate anything.
+ ??? Represent this more explicitely. */
+ gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_SCALAR_STMTS (node)[0])
+ == shift_vec_info_type)
+ && j == 1);
+ continue;
+ }
+ unsigned group_size = SLP_TREE_SCALAR_OPS (child).length ();
+ poly_uint64 vf = 1;
+ if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ vf = loop_vinfo->vectorization_factor;
+ SLP_TREE_NUMBER_OF_VEC_STMTS (child)
+ = vect_get_num_vectors (vf * group_size, vector_type);
+ /* And cost them. */
+ vect_prologue_cost_for_slp (child, cost_vec);
+ }
+
/* If this node can't be vectorized, try pruning the tree here rather
than felling the whole thing. */
if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
- res = true;
+ {
+ /* We'll need to revisit this for invariant costing and number
+ of vectorized stmt setting. */
+ lvisited.remove (node);
+ res = true;
+ }
return res;
}
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
auto_vec<bool, 20> life;
- life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
+ life.safe_grow_cleared
+ (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ());
vect_bb_slp_scalar_cost (bb_vinfo,
SLP_INSTANCE_TREE (instance),
&life, &scalar_costs, visited);
return false;
}
+ /* Optimize permutations. */
+ vect_optimize_slp (bb_vinfo);
+
vect_record_base_alignments (bb_vinfo);
/* Analyze and verify the alignment of data references and the
gimple_stmt_iterator gsi;
bool any_vectorized = false;
- gsi = gsi_start_bb (bb);
+ gsi = gsi_after_labels (bb);
while (!gsi_end_p (gsi))
{
gimple_stmt_iterator region_begin = gsi;
}
-/* Return 1 if vector type STMT_VINFO is a boolean vector. */
-
-static bool
-vect_mask_constant_operand_p (vec_info *vinfo,
- stmt_vec_info stmt_vinfo, unsigned op_num)
-{
- enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
- tree op, vectype;
- enum vect_def_type dt;
-
- /* For comparison and COND_EXPR type is chosen depending
- on the non-constant other comparison operand. */
- if (TREE_CODE_CLASS (code) == tcc_comparison)
- {
- gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
- op = gimple_assign_rhs1 (stmt);
-
- if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
- gcc_unreachable ();
-
- return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
- }
-
- if (code == COND_EXPR)
- {
- gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
- tree cond = gimple_assign_rhs1 (stmt);
-
- if (TREE_CODE (cond) == SSA_NAME)
- {
- if (op_num > 0)
- return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
- op = cond;
- }
- else
- {
- if (op_num > 1)
- return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
- op = TREE_OPERAND (cond, 0);
- }
-
- if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
- gcc_unreachable ();
-
- return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
- }
-
- return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
-}
-
/* Build a variable-length vector in which the elements in ELTS are repeated
to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
RESULTS and add any new instructions to SEQ.
}
-/* For constant and loop invariant defs of SLP_NODE this function returns
- (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
- OP_NODE determines the node for the operand containing the scalar
- operands. */
+/* For constant and loop invariant defs in OP_NODE this function creates
+ vector defs that will be used in the vectorized stmts and stores them
+ to SLP_TREE_VEC_DEFS of OP_NODE. */
static void
-vect_get_constant_vectors (vec_info *vinfo,
- slp_tree slp_node, unsigned op_num,
- vec<tree> *vec_oprnds)
+vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
{
- slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
- stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
unsigned HOST_WIDE_INT nunits;
tree vec_cst;
unsigned j, number_of_places_left_in_vector;
unsigned int vec_num, i;
unsigned number_of_copies = 1;
bool constant_p;
- tree neutral_op = NULL;
gimple_seq ctor_seq = NULL;
auto_vec<tree, 16> permute_results;
- /* ??? SLP analysis should compute the vector type for the
- constant / invariant and store it in the SLP node. */
- tree op = op_node->ops[0];
- /* Check if vector type is a boolean vector. */
- tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
- if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
- && vect_mask_constant_operand_p (vinfo, stmt_vinfo, op_num))
- vector_type = truth_type_for (stmt_vectype);
- else
- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
-
- /* ??? For lane-reducing ops we should also have the required number
- of vector stmts initialized rather than second-guessing here. */
- unsigned int number_of_vectors;
- if (is_gimple_assign (stmt_vinfo->stmt)
- && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
- || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
- || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
- number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- else
- number_of_vectors
- = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
- * TYPE_VECTOR_SUBPARTS (stmt_vectype),
- vector_type);
- vec_oprnds->create (number_of_vectors);
+ /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
+ vector_type = SLP_TREE_VECTYPE (op_node);
+
+ unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (op_node);
+ SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
auto_vec<tree> voprnds (number_of_vectors);
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
- bool place_after_defs = false;
+ stmt_vec_info insert_after = NULL;
for (j = 0; j < number_of_copies; j++)
{
+ tree op;
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
{
/* Create 'vect_ = {op0,op1,...,opn}'. */
elts[number_of_places_left_in_vector] = op;
if (!CONSTANT_CLASS_P (op))
constant_p = false;
+ /* For BB vectorization we have to compute an insert location
+ when a def is inside the analyzed region since we cannot
+ simply insert at the BB start in this case. */
+ stmt_vec_info opdef;
if (TREE_CODE (orig_op) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (orig_op)
&& is_a <bb_vec_info> (vinfo)
- && (as_a <bb_vec_info> (vinfo)->bb
- == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
- place_after_defs = true;
+ && (opdef = vinfo->lookup_def (orig_op)))
+ {
+ if (!insert_after)
+ insert_after = opdef;
+ else
+ insert_after = get_later_stmt (insert_after, opdef);
+ }
if (number_of_places_left_in_vector == 0)
{
vec_cst = permute_results[number_of_vectors - j - 1];
}
tree init;
- gimple_stmt_iterator gsi;
- if (place_after_defs)
+ if (insert_after)
{
- stmt_vec_info last_stmt_info
- = vect_find_last_scalar_stmt_in_slp (slp_node);
- gsi = gsi_for_stmt (last_stmt_info->stmt);
- init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
+ gimple_stmt_iterator gsi = gsi_for_stmt (insert_after->stmt);
+ /* vect_init_vector inserts before. */
+ gsi_next (&gsi);
+ init = vect_init_vector (vinfo, NULL, vec_cst,
vector_type, &gsi);
}
else
- init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
+ init = vect_init_vector (vinfo, NULL, vec_cst,
vector_type, NULL);
if (ctor_seq != NULL)
{
- gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
+ gimple_stmt_iterator gsi
+ = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
ctor_seq = NULL;
}
voprnds.quick_push (init);
- place_after_defs = false;
+ insert_after = NULL;
number_of_places_left_in_vector = nunits;
constant_p = true;
elts.new_vector (vector_type, nunits, 1);
for (j = vec_num; j != 0; j--)
{
vop = voprnds[j - 1];
- vec_oprnds->quick_push (vop);
+ SLP_TREE_VEC_DEFS (op_node).quick_push (vop);
}
/* In case that VF is greater than the unrolling factor needed for the SLP
group of stmts, NUMBER_OF_VECTORS to be created is greater than
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
to replicate the vectors. */
- while (number_of_vectors > vec_oprnds->length ())
- {
- tree neutral_vec = NULL;
-
- if (neutral_op)
- {
- if (!neutral_vec)
- neutral_vec = build_vector_from_val (vector_type, neutral_op);
-
- vec_oprnds->quick_push (neutral_vec);
- }
- else
- {
- for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
- vec_oprnds->quick_push (vop);
- }
- }
+ while (number_of_vectors > SLP_TREE_VEC_DEFS (op_node).length ())
+ for (i = 0; SLP_TREE_VEC_DEFS (op_node).iterate (i, &vop) && i < vec_num;
+ i++)
+ SLP_TREE_VEC_DEFS (op_node).quick_push (vop);
}
}
-/* Get N vectorized definitions for SLP_NODE.
- If the scalar definitions are loop invariants or constants, collect them and
- call vect_get_constant_vectors() to create vector stmts.
- Otherwise, the def-stmts must be already vectorized and the vectorized stmts
- must be stored in the corresponding child of SLP_NODE, and we call
- vect_get_slp_vect_defs () to retrieve them. */
+/* Get N vectorized definitions for SLP_NODE. */
void
-vect_get_slp_defs (vec_info *vinfo,
+vect_get_slp_defs (vec_info *,
slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
{
if (n == -1U)
/* For each operand we check if it has vectorized definitions in a child
node or we need to create them (for invariants and constants). */
+ vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
- {
- vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
- vect_get_slp_vect_defs (child, &vec_defs);
- }
+ vect_get_slp_vect_defs (child, &vec_defs);
else
- vect_get_constant_vectors (vinfo, slp_node, i, &vec_defs);
+ vec_defs.splice (SLP_TREE_VEC_DEFS (child));
vec_oprnds->quick_push (vec_defs);
}
/* Generate vector permute statements from a list of loads in DR_CHAIN.
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
- permute statements for the SLP node NODE of the SLP instance
- SLP_NODE_INSTANCE. */
+ permute statements for the SLP node NODE. */
bool
vect_transform_slp_perm_load (vec_info *vinfo,
slp_tree node, slp_instance instance)
{
gimple_stmt_iterator si;
- stmt_vec_info stmt_info;
- unsigned int group_size;
- tree vectype;
int i, j;
slp_tree child;
- if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
- return;
-
/* See if we have already vectorized the node in the graph of the
SLP instance. */
- if (SLP_TREE_VEC_STMTS (node).exists ())
+ if ((SLP_TREE_DEF_TYPE (node) == vect_internal_def
+ && SLP_TREE_VEC_STMTS (node).exists ())
+ || SLP_TREE_VEC_DEFS (node).exists ())
return;
+ /* Vectorize externals and constants. */
+ if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (node) == vect_external_def)
+ {
+ /* ??? vectorizable_shift can end up using a scalar operand which is
+ currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
+ node in this case. */
+ if (!SLP_TREE_VECTYPE (node))
+ return;
+
+ vect_create_constant_vectors (vinfo, node);
+ return;
+ }
+
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_schedule_slp_instance (vinfo, child, instance);
STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
}
- stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+ stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
/* VECTYPE is the type of the destination. */
- vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+ unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));