/* Loop distribution.
- Copyright (C) 2006-2019 Free Software Foundation, Inc.
+ Copyright (C) 2006-2020 Free Software Foundation, Inc.
Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
and Sebastian Pop <sebastian.pop@amd.com>.
#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-scalar-evolution.h"
-#include "params.h"
#include "tree-vectorizer.h"
#include "tree-eh.h"
#include "gimple-fold.h"
#define MAX_DATAREFS_NUM \
- ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
+ ((unsigned) param_loop_max_datarefs_for_datadeps)
/* Threshold controlling number of distributed partitions. Given it may
be unnecessary if a memory stream cost model is invented in the future,
return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
}
-/* The loop (nest) to be distributed. */
-static vec<loop_p> loop_nest;
-/* Vector of data references in the loop to be distributed. */
-static vec<data_reference_p> datarefs_vec;
-/* If there is nonaddressable data reference in above vector. */
-static bool has_nonaddressable_dataref_p;
-
-/* Store index of data reference in aux field. */
#define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
-/* Hash table for data dependence relation in the loop to be distributed. */
-static hash_table<ddr_hasher> *ddrs_table;
-
/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
struct rdg_vertex
{
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
+/* Kind of distributed loop. */
+enum partition_kind {
+ PKIND_NORMAL,
+ /* Partial memset stands for a paritition can be distributed into a loop
+ of memset calls, rather than a single memset call. It's handled just
+ like a normal parition, i.e, distributed as separate loop, no memset
+ call is generated.
+
+ Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
+ loop nest as deep as possible. As a result, parloop achieves better
+ parallelization by parallelizing deeper loop nest. This hack should
+ be unnecessary and removed once distributed memset can be understood
+ and analyzed in data reference analysis. See PR82604 for more. */
+ PKIND_PARTIAL_MEMSET,
+ PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
+};
+
+/* Type of distributed loop. */
+enum partition_type {
+ /* The distributed loop can be executed parallelly. */
+ PTYPE_PARALLEL = 0,
+ /* The distributed loop has to be executed sequentially. */
+ PTYPE_SEQUENTIAL
+};
+
+/* Builtin info for loop distribution. */
+struct builtin_info
+{
+ /* data-references a kind != PKIND_NORMAL partition is about. */
+ data_reference_p dst_dr;
+ data_reference_p src_dr;
+ /* Base address and size of memory objects operated by the builtin. Note
+ both dest and source memory objects must have the same size. */
+ tree dst_base;
+ tree src_base;
+ tree size;
+ /* Base and offset part of dst_base after stripping constant offset. This
+ is only used in memset builtin distribution for now. */
+ tree dst_base_base;
+ unsigned HOST_WIDE_INT dst_base_offset;
+};
+
+/* Partition for loop distribution. */
+struct partition
+{
+ /* Statements of the partition. */
+ bitmap stmts;
+ /* True if the partition defines variable which is used outside of loop. */
+ bool reduction_p;
+ location_t loc;
+ enum partition_kind kind;
+ enum partition_type type;
+ /* Data references in the partition. */
+ bitmap datarefs;
+ /* Information of builtin parition. */
+ struct builtin_info *builtin;
+};
+
+/* Partitions are fused because of different reasons. */
+enum fuse_type
+{
+ FUSE_NON_BUILTIN = 0,
+ FUSE_REDUCTION = 1,
+ FUSE_SHARE_REF = 2,
+ FUSE_SAME_SCC = 3,
+ FUSE_FINALIZE = 4
+};
+
+/* Description on different fusing reason. */
+static const char *fuse_message[] = {
+ "they are non-builtins",
+ "they have reductions",
+ "they have shared memory refs",
+ "they are in the same dependence scc",
+ "there is no point to distribute loop"};
+
+
/* Dump vertex I in RDG to FILE. */
static void
}
}
-/* Build the vertices of the reduced dependence graph RDG. Return false
- if that failed. */
-static bool
-create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
+class loop_distribution
+{
+ private:
+ /* The loop (nest) to be distributed. */
+ vec<loop_p> loop_nest;
+
+ /* Vector of data references in the loop to be distributed. */
+ vec<data_reference_p> datarefs_vec;
+
+ /* If there is nonaddressable data reference in above vector. */
+ bool has_nonaddressable_dataref_p;
+
+ /* Store index of data reference in aux field. */
+
+ /* Hash table for data dependence relation in the loop to be distributed. */
+ hash_table<ddr_hasher> *ddrs_table;
+
+ /* Array mapping basic block's index to its topological order. */
+ int *bb_top_order_index;
+ /* And size of the array. */
+ int bb_top_order_index_size;
+
+ /* Build the vertices of the reduced dependence graph RDG. Return false
+ if that failed. */
+ bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop);
+
+ /* Initialize STMTS with all the statements of LOOP. We use topological
+ order to discover all statements. The order is important because
+ generate_loops_for_partition is using the same traversal for identifying
+ statements in loop copies. */
+ void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
+
+
+ /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
+ LOOP, and one edge per flow dependence or control dependence from control
+ dependence CD. During visiting each statement, data references are also
+ collected and recorded in global data DATAREFS_VEC. */
+ struct graph * build_rdg (class loop *loop, control_dependences *cd);
+
+/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
+ graph and we update type for result partition if it is non-NULL. */
+ void partition_merge_into (struct graph *rdg,
+ partition *dest, partition *partition,
+ enum fuse_type ft);
+
+
+ /* Return data dependence relation for data references A and B. The two
+ data references must be in lexicographic order wrto reduced dependence
+ graph RDG. We firstly try to find ddr from global ddr hash table. If
+ it doesn't exist, compute the ddr and cache it. */
+ data_dependence_relation * get_data_dependence (struct graph *rdg,
+ data_reference_p a,
+ data_reference_p b);
+
+
+ /* In reduced dependence graph RDG for loop distribution, return true if
+ dependence between references DR1 and DR2 leads to a dependence cycle
+ and such dependence cycle can't be resolved by runtime alias check. */
+ bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
+ data_reference_p dr2);
+
+
+ /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
+ PARTITION1's type after merging PARTITION2 into PARTITION1. */
+ void update_type_for_merge (struct graph *rdg,
+ partition *partition1, partition *partition2);
+
+
+ /* Returns a partition with all the statements needed for computing
+ the vertex V of the RDG, also including the loop exit conditions. */
+ partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
+
+ /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
+ if it forms builtin memcpy or memmove call. */
+ void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
+ data_reference_p dst_dr, data_reference_p src_dr);
+
+ /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
+ For the moment we detect memset, memcpy and memmove patterns. Bitmap
+ STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
+ Returns true if there is a reduction in all partitions and we
+ possibly did not mark PARTITION as having one for this reason. */
+
+ bool
+ classify_partition (loop_p loop,
+ struct graph *rdg, partition *partition,
+ bitmap stmt_in_all_partitions);
+
+
+ /* Returns true when PARTITION1 and PARTITION2 access the same memory
+ object in RDG. */
+ bool share_memory_accesses (struct graph *rdg,
+ partition *partition1, partition *partition2);
+
+ /* For each seed statement in STARTING_STMTS, this function builds
+ partition for it by adding depended statements according to RDG.
+ All partitions are recorded in PARTITIONS. */
+ void rdg_build_partitions (struct graph *rdg,
+ vec<gimple *> starting_stmts,
+ vec<partition *> *partitions);
+
+ /* Compute partition dependence created by the data references in DRS1
+ and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
+ not NULL, we record dependence introduced by possible alias between
+ two data references in ALIAS_DDRS; otherwise, we simply ignore such
+ dependence as if it doesn't exist at all. */
+ int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
+ bitmap drs2, vec<ddr_p> *alias_ddrs);
+
+
+ /* Build and return partition dependence graph for PARTITIONS. RDG is
+ reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
+ is true, data dependence caused by possible alias between references
+ is ignored, as if it doesn't exist at all; otherwise all depdendences
+ are considered. */
+ struct graph *build_partition_graph (struct graph *rdg,
+ vec<struct partition *> *partitions,
+ bool ignore_alias_p);
+
+ /* Given reduced dependence graph RDG merge strong connected components
+ of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
+ possible alias between references is ignored, as if it doesn't exist
+ at all; otherwise all depdendences are considered. */
+ void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
+ *partitions, bool ignore_alias_p);
+
+/* This is the main function breaking strong conected components in
+ PARTITIONS giving reduced depdendence graph RDG. Store data dependence
+ relations for runtime alias check in ALIAS_DDRS. */
+ void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
+ *partitions, vec<ddr_p> *alias_ddrs);
+
+
+ /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
+ ALIAS_DDRS contains ddrs which need runtime alias check. */
+ void finalize_partitions (class loop *loop, vec<struct partition *>
+ *partitions, vec<ddr_p> *alias_ddrs);
+
+ /* Distributes the code from LOOP in such a way that producer statements
+ are placed before consumer statements. Tries to separate only the
+ statements from STMTS into separate loops. Returns the number of
+ distributed loops. Set NB_CALLS to number of generated builtin calls.
+ Set *DESTROY_P to whether LOOP needs to be destroyed. */
+ int distribute_loop (class loop *loop, vec<gimple *> stmts,
+ control_dependences *cd, int *nb_calls, bool *destroy_p,
+ bool only_patterns_p);
+
+ /* Compute topological order for basic blocks. Topological order is
+ needed because data dependence is computed for data references in
+ lexicographical order. */
+ void bb_top_order_init (void);
+
+ void bb_top_order_destroy (void);
+
+ public:
+
+ /* Getter for bb_top_order. */
+
+ inline int get_bb_top_order_index_size (void)
+ {
+ return bb_top_order_index_size;
+ }
+
+ inline int get_bb_top_order_index (int i)
+ {
+ return bb_top_order_index[i];
+ }
+
+ unsigned int execute (function *fun);
+};
+
+
+/* If X has a smaller topological sort number than Y, returns -1;
+ if greater, returns 1. */
+static int
+bb_top_order_cmp_r (const void *x, const void *y, void *loop)
+{
+ loop_distribution *_loop =
+ (loop_distribution *) loop;
+
+ basic_block bb1 = *(const basic_block *) x;
+ basic_block bb2 = *(const basic_block *) y;
+
+ int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
+
+ gcc_assert (bb1->index < bb_top_order_index_size
+ && bb2->index < bb_top_order_index_size);
+ gcc_assert (bb1 == bb2
+ || _loop->get_bb_top_order_index(bb1->index)
+ != _loop->get_bb_top_order_index(bb2->index));
+
+ return (_loop->get_bb_top_order_index(bb1->index) -
+ _loop->get_bb_top_order_index(bb2->index));
+}
+
+bool
+loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts,
+ loop_p loop)
{
int i;
gimple *stmt;
return true;
}
-/* Array mapping basic block's index to its topological order. */
-static int *bb_top_order_index;
-/* And size of the array. */
-static int bb_top_order_index_size;
-
-/* If X has a smaller topological sort number than Y, returns -1;
- if greater, returns 1. */
-
-static int
-bb_top_order_cmp (const void *x, const void *y)
-{
- basic_block bb1 = *(const basic_block *) x;
- basic_block bb2 = *(const basic_block *) y;
-
- gcc_assert (bb1->index < bb_top_order_index_size
- && bb2->index < bb_top_order_index_size);
- gcc_assert (bb1 == bb2
- || bb_top_order_index[bb1->index]
- != bb_top_order_index[bb2->index]);
-
- return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
-}
-
-/* Initialize STMTS with all the statements of LOOP. We use topological
- order to discover all statements. The order is important because
- generate_loops_for_partition is using the same traversal for identifying
- statements in loop copies. */
-
-static void
-stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
+void
+loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
{
unsigned int i;
- basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
for (i = 0; i < loop->num_nodes; i++)
{
free_graph (rdg);
}
-/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
- LOOP, and one edge per flow dependence or control dependence from control
- dependence CD. During visiting each statement, data references are also
- collected and recorded in global data DATAREFS_VEC. */
-
-static struct graph *
-build_rdg (struct loop *loop, control_dependences *cd)
+struct graph *
+loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
{
struct graph *rdg;
}
-/* Kind of distributed loop. */
-enum partition_kind {
- PKIND_NORMAL,
- /* Partial memset stands for a paritition can be distributed into a loop
- of memset calls, rather than a single memset call. It's handled just
- like a normal parition, i.e, distributed as separate loop, no memset
- call is generated.
-
- Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
- loop nest as deep as possible. As a result, parloop achieves better
- parallelization by parallelizing deeper loop nest. This hack should
- be unnecessary and removed once distributed memset can be understood
- and analyzed in data reference analysis. See PR82604 for more. */
- PKIND_PARTIAL_MEMSET,
- PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
-};
-
-/* Type of distributed loop. */
-enum partition_type {
- /* The distributed loop can be executed parallelly. */
- PTYPE_PARALLEL = 0,
- /* The distributed loop has to be executed sequentially. */
- PTYPE_SEQUENTIAL
-};
-
-/* Builtin info for loop distribution. */
-struct builtin_info
-{
- /* data-references a kind != PKIND_NORMAL partition is about. */
- data_reference_p dst_dr;
- data_reference_p src_dr;
- /* Base address and size of memory objects operated by the builtin. Note
- both dest and source memory objects must have the same size. */
- tree dst_base;
- tree src_base;
- tree size;
- /* Base and offset part of dst_base after stripping constant offset. This
- is only used in memset builtin distribution for now. */
- tree dst_base_base;
- unsigned HOST_WIDE_INT dst_base_offset;
-};
-
-/* Partition for loop distribution. */
-struct partition
-{
- /* Statements of the partition. */
- bitmap stmts;
- /* True if the partition defines variable which is used outside of loop. */
- bool reduction_p;
- location_t loc;
- enum partition_kind kind;
- enum partition_type type;
- /* Data references in the partition. */
- bitmap datarefs;
- /* Information of builtin parition. */
- struct builtin_info *builtin;
-};
-
-
/* Allocate and initialize a partition from BITMAP. */
static partition *
return partition->reduction_p;
}
-/* Partitions are fused because of different reasons. */
-enum fuse_type
-{
- FUSE_NON_BUILTIN = 0,
- FUSE_REDUCTION = 1,
- FUSE_SHARE_REF = 2,
- FUSE_SAME_SCC = 3,
- FUSE_FINALIZE = 4
-};
-
-/* Description on different fusing reason. */
-static const char *fuse_message[] = {
- "they are non-builtins",
- "they have reductions",
- "they have shared memory refs",
- "they are in the same dependence scc",
- "there is no point to distribute loop"};
-
-static void
-update_type_for_merge (struct graph *, partition *, partition *);
-
-/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
- graph and we update type for result partition if it is non-NULL. */
-
-static void
-partition_merge_into (struct graph *rdg, partition *dest,
- partition *partition, enum fuse_type ft)
+void
+loop_distribution::partition_merge_into (struct graph *rdg,
+ partition *dest, partition *partition, enum fuse_type ft)
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
/* Return a copy of LOOP placed before LOOP. */
-static struct loop *
-copy_loop_before (struct loop *loop)
+static class loop *
+copy_loop_before (class loop *loop)
{
- struct loop *res;
+ class loop *res;
edge preheader = loop_preheader_edge (loop);
initialize_original_copy_tables ();
/* Creates an empty basic block after LOOP. */
static void
-create_bb_after_loop (struct loop *loop)
+create_bb_after_loop (class loop *loop)
{
edge exit = single_exit (loop);
basic blocks of a loop are taken in dom order. */
static void
-generate_loops_for_partition (struct loop *loop, partition *partition,
+generate_loops_for_partition (class loop *loop, partition *partition,
bool copy_p)
{
unsigned i;
/* Generate a call to memset for PARTITION in LOOP. */
static void
-generate_memset_builtin (struct loop *loop, partition *partition)
+generate_memset_builtin (class loop *loop, partition *partition)
{
gimple_stmt_iterator gsi;
tree mem, fn, nb_bytes;
nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
- mem = builtin->dst_base;
+ mem = rewrite_to_non_trapping_overflow (builtin->dst_base);
mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
/* Generate a call to memcpy for PARTITION in LOOP. */
static void
-generate_memcpy_builtin (struct loop *loop, partition *partition)
+generate_memcpy_builtin (class loop *loop, partition *partition)
{
gimple_stmt_iterator gsi;
gimple *fn_call;
nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
- dest = builtin->dst_base;
- src = builtin->src_base;
+ dest = rewrite_to_non_trapping_overflow (builtin->dst_base);
+ src = rewrite_to_non_trapping_overflow (builtin->src_base);
if (partition->kind == PKIND_MEMCPY
|| ! ptr_derefs_may_alias_p (dest, src))
kind = BUILT_IN_MEMCPY;
/* Remove and destroy the loop LOOP. */
static void
-destroy_loop (struct loop *loop)
+destroy_loop (class loop *loop)
{
unsigned nbbs = loop->num_nodes;
edge exit = single_exit (loop);
/* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
static bool
-generate_code_for_partition (struct loop *loop,
+generate_code_for_partition (class loop *loop,
partition *partition, bool copy_p)
{
switch (partition->kind)
return false;
}
-/* Return data dependence relation for data references A and B. The two
- data references must be in lexicographic order wrto reduced dependence
- graph RDG. We firstly try to find ddr from global ddr hash table. If
- it doesn't exist, compute the ddr and cache it. */
-
-static data_dependence_relation *
-get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
+data_dependence_relation *
+loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
+ data_reference_p b)
{
struct data_dependence_relation ent, **slot;
struct data_dependence_relation *ddr;
return *slot;
}
-/* In reduced dependence graph RDG for loop distribution, return true if
- dependence between references DR1 and DR2 leads to a dependence cycle
- and such dependence cycle can't be resolved by runtime alias check. */
-
-static bool
-data_dep_in_cycle_p (struct graph *rdg,
- data_reference_p dr1, data_reference_p dr2)
+bool
+loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
+ data_reference_p dr1,
+ data_reference_p dr2)
{
struct data_dependence_relation *ddr;
return true;
}
-/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
- PARTITION1's type after merging PARTITION2 into PARTITION1. */
-
-static void
-update_type_for_merge (struct graph *rdg,
- partition *partition1, partition *partition2)
+void
+loop_distribution::update_type_for_merge (struct graph *rdg,
+ partition *partition1,
+ partition *partition2)
{
unsigned i, j;
bitmap_iterator bi, bj;
}
}
-/* Returns a partition with all the statements needed for computing
- the vertex V of the RDG, also including the loop exit conditions. */
-
-static partition *
-build_rdg_partition_for_vertex (struct graph *rdg, int v)
+partition *
+loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
{
partition *partition = partition_alloc ();
auto_vec<int, 3> nodes;
data references. */
static bool
-find_single_drs (struct loop *loop, struct graph *rdg, partition *partition,
+find_single_drs (class loop *loop, struct graph *rdg, partition *partition,
data_reference_p *dst_dr, data_reference_p *src_dr)
{
unsigned i;
{
location_t loc = gimple_location (DR_STMT (dr));
basic_block bb = gimple_bb (DR_STMT (dr));
- struct loop *loop = bb->loop_father;
+ class loop *loop = bb->loop_father;
tree ref = DR_REF (dr);
tree access_base = build_fold_addr_expr (ref);
tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
if it forms builtin memcpy or memmove call. */
-static void
-classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
- data_reference_p dst_dr, data_reference_p src_dr)
+void
+loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
+ partition *partition,
+ data_reference_p dst_dr,
+ data_reference_p src_dr)
{
tree base, size, src_base, src_size;
auto_vec<tree> dst_steps, src_steps;
return;
}
-/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
- For the moment we detect memset, memcpy and memmove patterns. Bitmap
- STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
-
-static void
-classify_partition (loop_p loop, struct graph *rdg, partition *partition,
- bitmap stmt_in_all_partitions)
+bool
+loop_distribution::classify_partition (loop_p loop,
+ struct graph *rdg, partition *partition,
+ bitmap stmt_in_all_partitions)
{
bitmap_iterator bi;
unsigned i;
to all partitions. In such case, reduction will be computed
correctly no matter how partitions are fused/distributed. */
if (!bitmap_bit_p (stmt_in_all_partitions, i))
- {
- partition->reduction_p = true;
- return;
- }
- has_reduction = true;
+ partition->reduction_p = true;
+ else
+ has_reduction = true;
}
}
+ /* Simple workaround to prevent classifying the partition as builtin
+ if it contains any use outside of loop. For the case where all
+ partitions have the reduction this simple workaround is delayed
+ to only affect the last partition. */
+ if (partition->reduction_p)
+ return has_reduction;
+
/* Perform general partition disqualification for builtins. */
if (volatiles_p
- /* Simple workaround to prevent classifying the partition as builtin
- if it contains any use outside of loop. */
- || has_reduction
|| !flag_tree_loop_distribute_patterns)
- return;
+ return has_reduction;
/* Find single load/store data references for builtin partition. */
if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
- return;
+ return has_reduction;
partition->loc = gimple_location (DR_STMT (single_st));
classify_builtin_st (loop, partition, single_st);
else
classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
+ return has_reduction;
}
-/* Returns true when PARTITION1 and PARTITION2 access the same memory
- object in RDG. */
-
-static bool
-share_memory_accesses (struct graph *rdg,
+bool
+loop_distribution::share_memory_accesses (struct graph *rdg,
partition *partition1, partition *partition2)
{
unsigned i, j;
partition for it by adding depended statements according to RDG.
All partitions are recorded in PARTITIONS. */
-static void
-rdg_build_partitions (struct graph *rdg,
- vec<gimple *> starting_stmts,
- vec<partition *> *partitions)
+void
+loop_distribution::rdg_build_partitions (struct graph *rdg,
+ vec<gimple *> starting_stmts,
+ vec<partition *> *partitions)
{
auto_bitmap processed;
int i;
return false;
}
-/* Compute partition dependence created by the data references in DRS1
- and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
- not NULL, we record dependence introduced by possible alias between
- two data references in ALIAS_DDRS; otherwise, we simply ignore such
- dependence as if it doesn't exist at all. */
-
-static int
-pg_add_dependence_edges (struct graph *rdg, int dir,
+int
+loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
{
unsigned i, j;
this_dir = -this_dir;
/* Known dependences can still be unordered througout the
- iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
+ iteration space, see gcc.dg/tree-ssa/ldist-16.c and
+ gcc.dg/tree-ssa/pr94969.c. */
if (DDR_NUM_DIST_VECTS (ddr) != 1)
this_dir = 2;
/* If the overlap is exact preserve stmt order. */
is ignored, as if it doesn't exist at all; otherwise all depdendences
are considered. */
-static struct graph *
-build_partition_graph (struct graph *rdg,
- vec<struct partition *> *partitions,
- bool ignore_alias_p)
+struct graph *
+loop_distribution::build_partition_graph (struct graph *rdg,
+ vec<struct partition *> *partitions,
+ bool ignore_alias_p)
{
int i, j;
struct partition *partition1, *partition2;
}
}
-/* Given reduced dependence graph RDG merge strong connected components
- of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
- possible alias between references is ignored, as if it doesn't exist
- at all; otherwise all depdendences are considered. */
-
-static void
-merge_dep_scc_partitions (struct graph *rdg,
- vec<struct partition *> *partitions,
- bool ignore_alias_p)
+void
+loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
+ vec<struct partition *> *partitions,
+ bool ignore_alias_p)
{
struct partition *partition1, *partition2;
struct pg_vdata *data;
/* This is the main function breaking strong conected components in
PARTITIONS giving reduced depdendence graph RDG. Store data dependence
relations for runtime alias check in ALIAS_DDRS. */
-
-static void
-break_alias_scc_partitions (struct graph *rdg,
- vec<struct partition *> *partitions,
- vec<ddr_p> *alias_ddrs)
+void
+loop_distribution::break_alias_scc_partitions (struct graph *rdg,
+ vec<struct partition *> *partitions,
+ vec<ddr_p> *alias_ddrs)
{
int i, j, k, num_sccs, num_sccs_no_alias;
/* Build partition dependence graph. */
if (cbdata.vertices_component[k] != i)
continue;
- /* Update postorder number so that merged reduction partition is
- sorted after other partitions. */
- if (!partition_reduction_p (first)
- && partition_reduction_p (partition))
- {
- gcc_assert (pg->vertices[k].post < pg->vertices[j].post);
- pg->vertices[j].post = pg->vertices[k].post;
- }
+ /* Update to the minimal postordeer number of vertices in scc so
+ that merged partition is sorted correctly against others. */
+ if (pg->vertices[j].post > pg->vertices[k].post)
+ pg->vertices[j].post = pg->vertices[k].post;
+
partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
(*partitions)[k] = NULL;
partition_free (partition);
DR. */
static inline bool
-latch_dominated_by_data_ref (struct loop *loop, data_reference *dr)
+latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
{
return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
gimple_bb (DR_STMT (dr)));
data dependence relations ALIAS_DDRS. */
static void
-compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
+compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
{
unsigned int i;
struct data_reference *dr_a = DDR_A (ddr);
struct data_reference *dr_b = DDR_B (ddr);
tree seg_length_a, seg_length_b;
- int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
- DR_BASE_ADDRESS (dr_b));
-
- if (comp_res == 0)
- comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
- gcc_assert (comp_res != 0);
if (latch_dominated_by_data_ref (loop, dr_a))
seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
dr_with_seg_len_pair_t dr_with_seg_len_pair
(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
- dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
-
- /* Canonicalize pairs by sorting the two DR members. */
- if (comp_res > 0)
- std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
+ dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
+ /* ??? Would WELL_ORDERED be safe? */
+ dr_with_seg_len_pair_t::REORDERED);
comp_alias_pairs->safe_push (dr_with_seg_len_pair);
}
static void
version_loop_by_alias_check (vec<struct partition *> *partitions,
- struct loop *loop, vec<ddr_p> *alias_ddrs)
+ class loop *loop, vec<ddr_p> *alias_ddrs)
{
profile_probability prob;
basic_block cond_bb;
- struct loop *nloop;
+ class loop *nloop;
tree lhs, arg0, cond_expr = NULL_TREE;
gimple_seq cond_stmts = NULL;
gimple *call_stmt = NULL;
}
}
-/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
- ALIAS_DDRS contains ddrs which need runtime alias check. */
-
-static void
-finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
- vec<ddr_p> *alias_ddrs)
+void
+loop_distribution::finalize_partitions (class loop *loop,
+ vec<struct partition *> *partitions,
+ vec<ddr_p> *alias_ddrs)
{
unsigned i;
struct partition *partition, *a;
distributed loops. Set NB_CALLS to number of generated builtin calls.
Set *DESTROY_P to whether LOOP needs to be destroyed. */
-static int
-distribute_loop (struct loop *loop, vec<gimple *> stmts,
+int
+loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
control_dependences *cd, int *nb_calls, bool *destroy_p,
bool only_patterns_p)
{
ddrs_table = new hash_table<ddr_hasher> (389);
struct graph *rdg;
partition *partition;
- bool any_builtin;
int i, nbp;
*destroy_p = false;
for (i = 1; partitions.iterate (i, &partition); ++i)
bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
- any_builtin = false;
+ bool any_builtin = false;
+ bool reduction_in_all = false;
FOR_EACH_VEC_ELT (partitions, i, partition)
{
- classify_partition (loop, rdg, partition, stmt_in_all_partitions);
+ reduction_in_all
+ |= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
any_builtin |= partition_builtin_p (partition);
}
i--;
}
+ /* Put a non-builtin partition last if we need to preserve a reduction.
+ ??? This is a workaround that makes sort_partitions_by_post_order do
+ the correct thing while in reality it should sort each component
+ separately and then put the component with a reduction or a non-builtin
+ last. */
+ if (reduction_in_all
+ && partition_builtin_p (partitions.last()))
+ FOR_EACH_VEC_ELT (partitions, i, partition)
+ if (!partition_builtin_p (partition))
+ {
+ partitions.unordered_remove (i);
+ partitions.quick_push (partition);
+ break;
+ }
+
/* Build the partition dependency graph and fuse partitions in strong
connected component. */
if (partitions.length () > 1)
finalize_partitions (loop, &partitions, &alias_ddrs);
+ /* If there is a reduction in all partitions make sure the last one
+ is not classified for builtin code generation. */
+ if (reduction_in_all)
+ {
+ partition = partitions.last ();
+ if (only_patterns_p
+ && partition_builtin_p (partition)
+ && !partition_builtin_p (partitions[0]))
+ {
+ nbp = 0;
+ goto ldist_done;
+ }
+ partition->kind = PKIND_NORMAL;
+ }
+
nbp = partitions.length ();
if (nbp == 0
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
return nbp - *nb_calls;
}
-/* Distribute all loops in the current function. */
-
-namespace {
-const pass_data pass_data_loop_distribution =
+void loop_distribution::bb_top_order_init (void)
{
- GIMPLE_PASS, /* type */
- "ldist", /* name */
- OPTGROUP_LOOP, /* optinfo_flags */
- TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
- ( PROP_cfg | PROP_ssa ), /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
+ int rpo_num;
+ int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
-class pass_loop_distribution : public gimple_opt_pass
-{
-public:
- pass_loop_distribution (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_loop_distribution, ctxt)
- {}
+ bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
+ bb_top_order_index_size = last_basic_block_for_fn (cfun);
+ rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
+ for (int i = 0; i < rpo_num; i++)
+ bb_top_order_index[rpo[i]] = i;
- /* opt_pass methods: */
- virtual bool gate (function *)
- {
- return flag_tree_loop_distribution
- || flag_tree_loop_distribute_patterns;
- }
-
- virtual unsigned int execute (function *);
+ free (rpo);
+}
-}; // class pass_loop_distribution
+void loop_distribution::bb_top_order_destroy ()
+{
+ free (bb_top_order_index);
+ bb_top_order_index = NULL;
+ bb_top_order_index_size = 0;
+}
/* Given LOOP, this function records seed statements for distribution in
WORK_LIST. Return false if there is nothing for distribution. */
static bool
-find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
+find_seed_stmts_for_distribution (class loop *loop, vec<gimple *> *work_list)
{
basic_block *bbs = get_loop_body_in_dom_order (loop);
/* Given innermost LOOP, return the outermost enclosing loop that forms a
perfect loop nest. */
-static struct loop *
-prepare_perfect_loop_nest (struct loop *loop)
+static class loop *
+prepare_perfect_loop_nest (class loop *loop)
{
- struct loop *outer = loop_outer (loop);
+ class loop *outer = loop_outer (loop);
tree niters = number_of_latch_executions (loop);
/* TODO: We only support the innermost 3-level loop nest distribution
return loop;
}
+
unsigned int
-pass_loop_distribution::execute (function *fun)
+loop_distribution::execute (function *fun)
{
- struct loop *loop;
+ class loop *loop;
bool changed = false;
basic_block bb;
control_dependences *cd = NULL;
if (number_of_loops (fun) <= 1)
return 0;
- /* Compute topological order for basic blocks. Topological order is
- needed because data dependence is computed for data references in
- lexicographical order. */
- if (bb_top_order_index == NULL)
- {
- int rpo_num;
- int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
-
- bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
- bb_top_order_index_size = last_basic_block_for_fn (cfun);
- rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
- for (int i = 0; i < rpo_num; i++)
- bb_top_order_index[rpo[i]] = i;
-
- free (rpo);
- }
+ bb_top_order_init ();
FOR_ALL_BB_FN (bb, fun)
{
delete cd;
if (bb_top_order_index != NULL)
- {
- free (bb_top_order_index);
- bb_top_order_index = NULL;
- bb_top_order_index_size = 0;
- }
+ bb_top_order_destroy ();
if (changed)
{
return changed ? TODO_cleanup_cfg : 0;
}
+
+/* Distribute all loops in the current function. */
+
+namespace {
+
+const pass_data pass_data_loop_distribution =
+{
+ GIMPLE_PASS, /* type */
+ "ldist", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_loop_distribution : public gimple_opt_pass
+{
+public:
+ pass_loop_distribution (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_distribution, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_tree_loop_distribution
+ || flag_tree_loop_distribute_patterns;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_loop_distribution
+
+unsigned int
+pass_loop_distribution::execute (function *fun)
+{
+ return loop_distribution ().execute (fun);
+}
+
} // anon namespace
gimple_opt_pass *