/* RTL-level loop invariant motion.
- Copyright (C) 2004-2015 Free Software Foundation, Inc.
+ Copyright (C) 2004-2019 Free Software Foundation, Inc.
This file is part of GCC.
#include "system.h"
#include "coretypes.h"
#include "backend.h"
-#include "cfghooks.h"
-#include "tree.h"
+#include "target.h"
#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
#include "df.h"
+#include "memmodel.h"
#include "tm_p.h"
+#include "insn-config.h"
+#include "regs.h"
+#include "ira.h"
+#include "recog.h"
#include "cfgrtl.h"
#include "cfgloop.h"
-#include "flags.h"
-#include "alias.h"
-#include "insn-config.h"
-#include "expmed.h"
-#include "dojump.h"
-#include "explow.h"
-#include "calls.h"
-#include "emit-rtl.h"
-#include "varasm.h"
-#include "stmt.h"
#include "expr.h"
-#include "recog.h"
-#include "target.h"
-#include "except.h"
#include "params.h"
-#include "regs.h"
-#include "ira.h"
+#include "rtl-iter.h"
#include "dumpfile.h"
/* The data stored for the loop. */
-struct loop_data
+class loop_data
{
- struct loop *outermost_exit; /* The outermost exit of the loop. */
+public:
+ class loop *outermost_exit; /* The outermost exit of the loop. */
bool has_call; /* True if the loop contains a call. */
/* Maximal register pressure inside loop for given register class
(defined only for the pressure classes). */
bitmap_head regs_live;
};
-#define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux)
+#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
/* The description of an use. */
/* The number of invariants which eqto this. */
unsigned eqno;
- /* If we moved the invariant out of the loop, the register that contains its
- value. */
- rtx reg;
-
/* If we moved the invariant out of the loop, the original regno
that contained its value. */
int orig_regno;
+ /* If we moved the invariant out of the loop, the register that contains its
+ value. */
+ rtx reg;
+
/* The definition of the invariant. */
struct def *def;
/* Cost of the invariant. */
unsigned cost;
- /* The invariants it depends on. */
- bitmap depends_on;
-
/* Used for detecting already visited invariants during determining
costs of movements. */
unsigned stamp;
+
+ /* The invariants it depends on. */
+ bitmap depends_on;
};
/* Currently processed loop. */
-static struct loop *curr_loop;
+static class loop *curr_loop;
/* Table of invariants indexed by the df_ref uid field. */
}
else if (fmt[i] == 'i' || fmt[i] == 'n')
val ^= XINT (x, i);
+ else if (fmt[i] == 'p')
+ val ^= constant_lower_bound (SUBREG_BYTE (x));
}
return val;
if (XINT (e1, i) != XINT (e2, i))
return false;
}
+ else if (fmt[i] == 'p')
+ {
+ if (maybe_ne (SUBREG_BYTE (e1), SUBREG_BYTE (e2)))
+ return false;
+ }
/* Unhandled type of subexpression, we fail conservatively. */
else
return false;
get_loop_body_in_dom_order. */
static void
-compute_always_reached (struct loop *loop, basic_block *body,
+compute_always_reached (class loop *loop, basic_block *body,
bitmap may_exit, bitmap always_reached)
{
unsigned i;
additionally mark blocks that may exit due to a call. */
static void
-find_exits (struct loop *loop, basic_block *body,
+find_exits (class loop *loop, basic_block *body,
bitmap may_exit, bitmap has_exit)
{
unsigned i;
edge_iterator ei;
edge e;
- struct loop *outermost_exit = loop, *aexit;
+ class loop *outermost_exit = loop, *aexit;
bool has_call = false;
rtx_insn *insn;
FOR_EACH_EDGE (e, ei, body[i]->succs)
{
- if (flow_bb_inside_loop_p (loop, e->dest))
- continue;
-
- bitmap_set_bit (may_exit, i);
- bitmap_set_bit (has_exit, i);
- outermost_exit = find_common_loop (outermost_exit,
- e->dest->loop_father);
+ if (! flow_bb_inside_loop_p (loop, e->dest))
+ {
+ bitmap_set_bit (may_exit, i);
+ bitmap_set_bit (has_exit, i);
+ outermost_exit = find_common_loop (outermost_exit,
+ e->dest->loop_father);
+ }
+ /* If we enter a subloop that might never terminate treat
+ it like a possible exit. */
+ if (flow_loop_nested_p (loop, e->dest->loop_father))
+ bitmap_set_bit (may_exit, i);
}
continue;
}
if (loop->aux == NULL)
{
- loop->aux = xcalloc (1, sizeof (struct loop_data));
+ loop->aux = xcalloc (1, sizeof (class loop_data));
bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
}
return (GET_MODE (x) != VOIDmode
&& GET_MODE (x) != BLKmode
&& can_copy_p (GET_MODE (x))
+ /* Do not mess with the frame pointer adjustments that can
+ be generated e.g. by expand_builtin_setjmp_receiver. */
+ && x != frame_pointer_rtx
&& (!REG_P (x)
|| !HARD_REGISTER_P (x)
|| REGNO_REG_CLASS (REGNO (x)) != NO_REGS));
BODY. */
static void
-find_defs (struct loop *loop)
+find_defs (class loop *loop)
{
if (dump_file)
{
loop->num);
}
- df_remove_problem (df_chain);
- df_process_deferred_rescans ();
df_chain_add_problem (DF_UD_CHAIN);
- df_live_add_problem ();
- df_live_set_all_dirty ();
df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
df_analyze_loop (loop);
check_invariant_table_size ();
return inv;
}
+/* Return a canonical version of X for the address, from the point of view,
+ that all multiplications are represented as MULT instead of the multiply
+ by a power of 2 being represented as ASHIFT.
+
+ Callers should prepare a copy of X because this function may modify it
+ in place. */
+
+static void
+canonicalize_address_mult (rtx x)
+{
+ subrtx_var_iterator::array_type array;
+ FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
+ {
+ rtx sub = *iter;
+ scalar_int_mode sub_mode;
+ if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode)
+ && GET_CODE (sub) == ASHIFT
+ && CONST_INT_P (XEXP (sub, 1))
+ && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode)
+ && INTVAL (XEXP (sub, 1)) >= 0)
+ {
+ HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1));
+ PUT_CODE (sub, MULT);
+ XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode);
+ iter.skip_subrtxes ();
+ }
+ }
+}
+
+/* Maximum number of sub expressions in address. We set it to
+ a small integer since it's unlikely to have a complicated
+ address expression. */
+
+#define MAX_CANON_ADDR_PARTS (5)
+
+/* Collect sub expressions in address X with PLUS as the seperator.
+ Sub expressions are stored in vector ADDR_PARTS. */
+
+static void
+collect_address_parts (rtx x, vec<rtx> *addr_parts)
+{
+ subrtx_var_iterator::array_type array;
+ FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
+ {
+ rtx sub = *iter;
+
+ if (GET_CODE (sub) != PLUS)
+ {
+ addr_parts->safe_push (sub);
+ iter.skip_subrtxes ();
+ }
+ }
+}
+
+/* Compare function for sorting sub expressions X and Y based on
+ precedence defined for communitive operations. */
+
+static int
+compare_address_parts (const void *x, const void *y)
+{
+ const rtx *rx = (const rtx *)x;
+ const rtx *ry = (const rtx *)y;
+ int px = commutative_operand_precedence (*rx);
+ int py = commutative_operand_precedence (*ry);
+
+ return (py - px);
+}
+
+/* Return a canonical version address for X by following steps:
+ 1) Rewrite ASHIFT into MULT recursively.
+ 2) Divide address into sub expressions with PLUS as the
+ separator.
+ 3) Sort sub expressions according to precedence defined
+ for communative operations.
+ 4) Simplify CONST_INT_P sub expressions.
+ 5) Create new canonicalized address and return.
+ Callers should prepare a copy of X because this function may
+ modify it in place. */
+
+static rtx
+canonicalize_address (rtx x)
+{
+ rtx res;
+ unsigned int i, j;
+ machine_mode mode = GET_MODE (x);
+ auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
+
+ /* Rewrite ASHIFT into MULT. */
+ canonicalize_address_mult (x);
+ /* Divide address into sub expressions. */
+ collect_address_parts (x, &addr_parts);
+ /* Unlikely to have very complicated address. */
+ if (addr_parts.length () < 2
+ || addr_parts.length () > MAX_CANON_ADDR_PARTS)
+ return x;
+
+ /* Sort sub expressions according to canonicalization precedence. */
+ addr_parts.qsort (compare_address_parts);
+
+ /* Simplify all constant int summary if possible. */
+ for (i = 0; i < addr_parts.length (); i++)
+ if (CONST_INT_P (addr_parts[i]))
+ break;
+
+ for (j = i + 1; j < addr_parts.length (); j++)
+ {
+ gcc_assert (CONST_INT_P (addr_parts[j]));
+ addr_parts[i] = simplify_gen_binary (PLUS, mode,
+ addr_parts[i],
+ addr_parts[j]);
+ }
+
+ /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */
+ res = addr_parts[0];
+ for (j = 1; j < i; j++)
+ res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
+
+ /* Pickup the last CONST_INT_P sub expression. */
+ if (i < addr_parts.length ())
+ res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
+
+ return res;
+}
+
/* Given invariant DEF and its address USE, check if the corresponding
invariant expr can be propagated into the use or not. */
inv_can_prop_to_addr_use (struct def *def, df_ref use)
{
struct invariant *inv;
- rtx *pos = DF_REF_REAL_LOC (use), def_set;
+ rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
rtx_insn *use_insn = DF_REF_INSN (use);
rtx_insn *def_insn;
bool ok;
validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
ok = verify_changes (0);
+ /* Try harder with canonicalization in address expression. */
+ if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
+ {
+ rtx src, dest, mem = NULL_RTX;
+
+ src = SET_SRC (use_set);
+ dest = SET_DEST (use_set);
+ if (MEM_P (src))
+ mem = src;
+ else if (MEM_P (dest))
+ mem = dest;
+
+ if (mem != NULL_RTX
+ && !memory_address_addr_space_p (GET_MODE (mem),
+ XEXP (mem, 0),
+ MEM_ADDR_SPACE (mem)))
+ {
+ rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
+ if (memory_address_addr_space_p (GET_MODE (mem),
+ addr, MEM_ADDR_SPACE (mem)))
+ ok = true;
+ }
+ }
cancel_changes (0);
return ok;
}
return true;
}
-/* Pre-check candidate DEST to skip the one which can not make a valid insn
+/* Pre-check candidate DEST to skip the one which cannot make a valid insn
during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */
static bool
pre_check_invariant_p (bool simple, rtx dest)
ends due to a function call. */
static void
-find_invariants_body (struct loop *loop, basic_block *body,
+find_invariants_body (class loop *loop, basic_block *body,
bitmap always_reached, bitmap always_executed)
{
unsigned i;
/* Finds invariants in LOOP. */
static void
-find_invariants (struct loop *loop)
+find_invariants (class loop *loop)
{
- bitmap may_exit = BITMAP_ALLOC (NULL);
- bitmap always_reached = BITMAP_ALLOC (NULL);
- bitmap has_exit = BITMAP_ALLOC (NULL);
- bitmap always_executed = BITMAP_ALLOC (NULL);
+ auto_bitmap may_exit;
+ auto_bitmap always_reached;
+ auto_bitmap has_exit;
+ auto_bitmap always_executed;
basic_block *body = get_loop_body_in_dom_order (loop);
find_exits (loop, body, may_exit, has_exit);
find_invariants_body (loop, body, always_reached, always_executed);
merge_identical_invariants ();
- BITMAP_FREE (always_reached);
- BITMAP_FREE (always_executed);
- BITMAP_FREE (may_exit);
- BITMAP_FREE (has_exit);
free (body);
}
This usually has the effect that FP constant loads from the constant
pool are not moved out of the loop.
- Note that this also means that dependent invariants can not be moved.
+ Note that this also means that dependent invariants cannot be moved.
However, the primary purpose of this pass is to move loop invariant
address arithmetic out of loops, and address arithmetic that depends
on floating point constants is unlikely to ever occur. */
the block preceding its header. */
static bool
-can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
+can_move_invariant_reg (class loop *loop, struct invariant *inv, rtx reg)
{
df_ref def, use;
unsigned int dest_regno, defs_in_loop_count = 0;
otherwise. */
static bool
-move_invariant_reg (struct loop *loop, unsigned invno)
+move_invariant_reg (class loop *loop, unsigned invno)
{
struct invariant *inv = invariants[invno];
struct invariant *repr = invariants[inv->eqto];
in TEMPORARY_REGS. */
static void
-move_invariants (struct loop *loop)
+move_invariants (class loop *loop)
{
struct invariant *inv;
unsigned i;
GENERAL_REGS, NO_REGS, GENERAL_REGS);
}
}
+ /* Remove the DF_UD_CHAIN problem added in find_defs before rescanning,
+ to save a bit of compile time. */
+ df_remove_problem (df_chain);
+ df_process_deferred_rescans ();
}
/* Initializes invariant motion data. */
/* Move the invariants out of the LOOP. */
static void
-move_single_loop_invariants (struct loop *loop)
+move_single_loop_invariants (class loop *loop)
{
init_inv_motion_data ();
/* Releases the auxiliary data for LOOP. */
static void
-free_loop_data (struct loop *loop)
+free_loop_data (class loop *loop)
{
- struct loop_data *data = LOOP_DATA (loop);
+ class loop_data *data = LOOP_DATA (loop);
if (!data)
return;
static void
mark_regno_live (int regno)
{
- struct loop *loop;
+ class loop *loop;
for (loop = curr_loop;
loop != current_loops->tree_root;
code = GET_CODE (x);
if (code == REG)
{
- struct loop *loop;
+ class loop *loop;
for (loop = curr_loop;
loop != current_loops->tree_root;
basic_block bb;
rtx_insn *insn;
rtx link;
- struct loop *loop, *parent;
+ class loop *loop, *parent;
FOR_EACH_LOOP (loop, 0)
if (loop->aux == NULL)
{
- loop->aux = xcalloc (1, sizeof (struct loop_data));
+ loop->aux = xcalloc (1, sizeof (class loop_data));
bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
}
}
}
}
- bitmap_clear (&curr_regs_live);
+ bitmap_release (&curr_regs_live);
if (flag_ira_region == IRA_REGION_MIXED
|| flag_ira_region == IRA_REGION_ALL)
FOR_EACH_LOOP (loop, 0)
void
move_loop_invariants (void)
{
- struct loop *loop;
-
+ class loop *loop;
+
+ if (optimize == 1)
+ df_live_add_problem ();
+ /* ??? This is a hack. We should only need to call df_live_set_all_dirty
+ for optimize == 1, but can_move_invariant_reg relies on DF_INSN_LUID
+ being up-to-date. That isn't always true (even after df_analyze)
+ because df_process_deferred_rescans doesn't necessarily cause
+ blocks to be rescanned. */
+ df_live_set_all_dirty ();
if (flag_ira_loop_pressure)
{
df_analyze ();
invariant_table = NULL;
invariant_table_size = 0;
-#ifdef ENABLE_CHECKING
- verify_flow_info ();
-#endif
+ if (optimize == 1)
+ df_remove_problem (df_live);
+
+ checking_verify_flow_info ();
}