/* Loop unroll-and-jam.
- Copyright (C) 2017-2019 Free Software Foundation, Inc.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
This file is part of GCC.
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "params.h"
#include "tree-pass.h"
#include "backend.h"
#include "tree.h"
#include "tree-data-ref.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-vectorizer.h"
+#include "tree-ssa-sccvn.h"
/* Unroll and Jam transformation
to the OLD loop or the outer loop of OLD now is inside LOOP. */
static void
-merge_loop_tree (struct loop *loop, struct loop *old)
+merge_loop_tree (class loop *loop, class loop *old)
{
basic_block *bbs;
int i, n;
- struct loop *subloop;
+ class loop *subloop;
edge e;
edge_iterator ei;
If so return true, otherwise return false. */
static bool
-unroll_jam_possible_p (struct loop *outer, struct loop *loop)
+unroll_jam_possible_p (class loop *outer, class loop *loop)
{
basic_block *bbs;
int i, n;
- struct tree_niter_desc niter;
+ class tree_niter_desc niter;
/* When fusing the loops we skip the latch block
of the first one, so it mustn't have any effects to
be in appropriate form. */
static void
-fuse_loops (struct loop *loop)
+fuse_loops (class loop *loop)
{
- struct loop *next = loop->next;
+ class loop *next = loop->next;
while (next)
{
merge_loop_tree (loop, next);
gcc_assert (!next->num_nodes);
- struct loop *ln = next->next;
+ class loop *ln = next->next;
delete_loop (next);
next = ln;
}
rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
}
+/* Return true if any of the access functions for dataref A
+ isn't invariant with respect to loop LOOP_NEST. */
+static bool
+any_access_function_variant_p (const struct data_reference *a,
+ const class loop *loop_nest)
+{
+ vec<tree> fns = DR_ACCESS_FNS (a);
+
+ for (tree t : fns)
+ if (!evolution_function_is_invariant_p (t, loop_nest->num))
+ return true;
+
+ return false;
+}
+
/* Returns true if the distance in DDR can be determined and adjusts
the unroll factor in *UNROLL to make unrolling valid for that distance.
- Otherwise return false.
+ Otherwise return false. DDR is with respect to the outer loop of INNER.
If this data dep can lead to a removed memory reference, increment
*REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
for this to happen. */
static bool
-adjust_unroll_factor (struct data_dependence_relation *ddr,
+adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
unsigned *unroll, unsigned *profit_unroll,
unsigned *removed)
{
gcc_unreachable ();
else if ((unsigned)dist >= *unroll)
;
- else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
- || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
- && dist > 0))
+ else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
+ {
+ /* We have (a,0) with a < N, so this will be transformed into
+ (0,0) after unrolling by N. This might potentially be a
+ problem, if it's not a read-read dependency. */
+ if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
+ ;
+ else
+ {
+ /* So, at least one is a write, and we might reduce the
+ distance vector to (0,0). This is still no problem
+ if both data-refs are affine with respect to the inner
+ loops. But if one of them is invariant with respect
+ to an inner loop our reordering implicit in loop fusion
+ corrupts the program, as our data dependences don't
+ capture this. E.g. for:
+ for (0 <= i < n)
+ for (0 <= j < m)
+ a[i][0] = a[i+1][0] + 2; // (1)
+ b[i][j] = b[i+1][j] + 2; // (2)
+ the distance vector for both statements is (-1,0),
+ but exchanging the order for (2) is okay, while
+ for (1) it is not. To see this, write out the original
+ accesses (assume m is 2):
+ a i j original
+ 0 0 0 r a[1][0] b[1][0]
+ 1 0 0 w a[0][0] b[0][0]
+ 2 0 1 r a[1][0] b[1][1]
+ 3 0 1 w a[0][0] b[0][1]
+ 4 1 0 r a[2][0] b[2][0]
+ 5 1 0 w a[1][0] b[1][0]
+ after unroll-by-2 and fusion the accesses are done in
+ this order (from column a): 0,1, 4,5, 2,3, i.e. this:
+ a i j transformed
+ 0 0 0 r a[1][0] b[1][0]
+ 1 0 0 w a[0][0] b[0][0]
+ 4 1 0 r a[2][0] b[2][0]
+ 5 1 0 w a[1][0] b[1][0]
+ 2 0 1 r a[1][0] b[1][1]
+ 3 0 1 w a[0][0] b[0][1]
+ Note how access 2 accesses the same element as access 5
+ for array 'a' but not for array 'b'. */
+ if (any_access_function_variant_p (DDR_A (ddr), inner)
+ && any_access_function_variant_p (DDR_B (ddr), inner))
+ ;
+ else
+ /* And if any dataref of this pair is invariant with
+ respect to the inner loop, we have no chance than
+ to reduce the unroll factor. */
+ *unroll = dist;
+ }
+ }
+ else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
;
else
*unroll = dist;
static unsigned int
tree_loop_unroll_and_jam (void)
{
- struct loop *loop;
- bool changed = false;
+ unsigned int todo = 0;
gcc_assert (scev_initialized_p ());
/* Go through all innermost loops. */
- FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
+ for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
{
- struct loop *outer = loop_outer (loop);
+ class loop *outer = loop_outer (loop);
if (loop_depth (loop) < 2
|| optimize_loop_nest_for_size_p (outer))
if (!unroll_jam_possible_p (outer, loop))
continue;
- vec<data_reference_p> datarefs;
- vec<ddr_p> dependences;
+ vec<data_reference_p> datarefs = vNULL;
+ vec<ddr_p> dependences = vNULL;
unsigned unroll_factor, profit_unroll, removed;
- struct tree_niter_desc desc;
+ class tree_niter_desc desc;
bool unroll = false;
auto_vec<loop_p, 3> loop_nest;
- dependences.create (10);
- datarefs.create (10);
if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
&datarefs, &dependences))
{
/* Now check the distance vector, for determining a sensible
outer unroll factor, and for validity of merging the inner
loop copies. */
- if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
+ if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
&removed))
{
/* Couldn't get the distance vector. For two reads that's
/* We regard a user-specified minimum percentage of zero as a request
to ignore all profitability concerns and apply the transformation
always. */
- if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
- profit_unroll = 2;
+ if (!param_unroll_jam_min_percent)
+ profit_unroll = MAX(2, profit_unroll);
else if (removed * 100 / datarefs.length ()
- < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
+ < (unsigned)param_unroll_jam_min_percent)
profit_unroll = 1;
if (unroll_factor > profit_unroll)
unroll_factor = profit_unroll;
- if (unroll_factor > (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL))
- unroll_factor = PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL);
+ if (unroll_factor > (unsigned)param_unroll_jam_max_unroll)
+ unroll_factor = param_unroll_jam_max_unroll;
unroll = (unroll_factor > 1
&& can_unroll_loop_p (outer, unroll_factor, &desc));
&desc);
free_original_copy_tables ();
fuse_loops (outer->inner);
- changed = true;
+ todo |= TODO_cleanup_cfg;
+
+ auto_bitmap exit_bbs;
+ bitmap_set_bit (exit_bbs, single_dom_exit (outer)->dest->index);
+ todo |= do_rpo_vn (cfun, loop_preheader_edge (outer), exit_bbs);
}
loop_nest.release ();
free_data_refs (datarefs);
}
- if (changed)
+ if (todo)
{
scev_reset ();
free_dominance_info (CDI_DOMINATORS);
- return TODO_cleanup_cfg;
}
- return 0;
+ return todo;
}
/* Pass boilerplate */