/* The Blackfin code generation auxiliary output file.
- Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+ Free Software Foundation, Inc.
Contributed by Analog Devices.
This file is part of GCC.
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
-#include "real.h"
#include "insn-config.h"
#include "insn-codes.h"
#include "conditions.h"
#include "langhooks.h"
#include "bfin-protos.h"
#include "tm-preds.h"
+#include "tm-constrs.h"
#include "gt-bfin.h"
#include "basic-block.h"
#include "cfglayout.h"
/* A C structure for machine-specific, per-function data.
This is added to the cfun structure. */
-struct machine_function GTY(())
+struct GTY(()) machine_function
{
+ /* Set if we are notified by the doloop pass that a hardware loop
+ was created. */
int has_hardware_loops;
-};
-/* Test and compare insns in bfin.md store the information needed to
- generate branch and scc insns here. */
-rtx bfin_compare_op0, bfin_compare_op1;
+ /* Set if we create a memcpy pattern that uses loop registers. */
+ int has_loopreg_clobber;
+};
/* RTX for condition code flag register and RETS register */
extern GTY(()) rtx bfin_cc_rtx;
const char *byte_reg_names[] = BYTE_REGISTER_NAMES;
static int arg_regs[] = FUNCTION_ARG_REGISTERS;
+static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
/* Nonzero if -mshared-library-id was given. */
static int bfin_lib_id_given;
static int bfin_flag_var_tracking;
/* -mcpu support */
-bfin_cpu_t bfin_cpu_type = DEFAULT_CPU_TYPE;
+bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN;
/* -msi-revision support. There are three special values:
-1 -msi-revision=none.
/* The workarounds enabled */
unsigned int bfin_workarounds = 0;
-static bool cputype_selected = false;
-
struct bfin_cpu
{
const char *name;
struct bfin_cpu bfin_cpus[] =
{
+ {"bf512", BFIN_CPU_BF512, 0x0000,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+
+ {"bf514", BFIN_CPU_BF514, 0x0000,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+
+ {"bf516", BFIN_CPU_BF516, 0x0000,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+
+ {"bf518", BFIN_CPU_BF518, 0x0000,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+
+ {"bf522", BFIN_CPU_BF522, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf522", BFIN_CPU_BF522, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf522", BFIN_CPU_BF522, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf523", BFIN_CPU_BF523, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf523", BFIN_CPU_BF523, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf523", BFIN_CPU_BF523, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf524", BFIN_CPU_BF524, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf524", BFIN_CPU_BF524, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf524", BFIN_CPU_BF524, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf525", BFIN_CPU_BF525, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf525", BFIN_CPU_BF525, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf525", BFIN_CPU_BF525, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf526", BFIN_CPU_BF526, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf526", BFIN_CPU_BF526, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf526", BFIN_CPU_BF526, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf527", BFIN_CPU_BF527, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_05000074},
+ {"bf527", BFIN_CPU_BF527, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
{"bf527", BFIN_CPU_BF527, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+ {"bf531", BFIN_CPU_BF531, 0x0006,
+ WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
{"bf531", BFIN_CPU_BF531, 0x0005,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+ | WA_LOAD_LCREGS | WA_05000074},
{"bf531", BFIN_CPU_BF531, 0x0004,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf531", BFIN_CPU_BF531, 0x0003,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf532", BFIN_CPU_BF532, 0x0006,
+ WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
{"bf532", BFIN_CPU_BF532, 0x0005,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+ | WA_LOAD_LCREGS | WA_05000074},
{"bf532", BFIN_CPU_BF532, 0x0004,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf532", BFIN_CPU_BF532, 0x0003,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf533", BFIN_CPU_BF533, 0x0006,
+ WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
{"bf533", BFIN_CPU_BF533, 0x0005,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+ | WA_LOAD_LCREGS | WA_05000074},
{"bf533", BFIN_CPU_BF533, 0x0004,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf533", BFIN_CPU_BF533, 0x0003,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf534", BFIN_CPU_BF534, 0x0003,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
{"bf534", BFIN_CPU_BF534, 0x0002,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf534", BFIN_CPU_BF534, 0x0001,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf536", BFIN_CPU_BF536, 0x0003,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
{"bf536", BFIN_CPU_BF536, 0x0002,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf536", BFIN_CPU_BF536, 0x0001,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf537", BFIN_CPU_BF537, 0x0003,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
{"bf537", BFIN_CPU_BF537, 0x0002,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf537", BFIN_CPU_BF537, 0x0001,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf538", BFIN_CPU_BF538, 0x0005,
+ WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
{"bf538", BFIN_CPU_BF538, 0x0004,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
{"bf538", BFIN_CPU_BF538, 0x0003,
- WA_SPECULATIVE_LOADS | WA_RETS},
-
+ WA_SPECULATIVE_LOADS | WA_RETS
+ | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
+ {"bf538", BFIN_CPU_BF538, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
+
+ {"bf539", BFIN_CPU_BF539, 0x0005,
+ WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
{"bf539", BFIN_CPU_BF539, 0x0004,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
{"bf539", BFIN_CPU_BF539, 0x0003,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS
+ | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
{"bf539", BFIN_CPU_BF539, 0x0002,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf542m", BFIN_CPU_BF542M, 0x0003,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+ {"bf542", BFIN_CPU_BF542, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf542", BFIN_CPU_BF542, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
{"bf542", BFIN_CPU_BF542, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+ | WA_05000074},
+
+ {"bf544m", BFIN_CPU_BF544M, 0x0003,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf544", BFIN_CPU_BF544, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf544", BFIN_CPU_BF544, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
{"bf544", BFIN_CPU_BF544, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf547m", BFIN_CPU_BF547M, 0x0003,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+ {"bf547", BFIN_CPU_BF547, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf547", BFIN_CPU_BF547, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
{"bf547", BFIN_CPU_BF547, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+ | WA_05000074},
+
+ {"bf548m", BFIN_CPU_BF548M, 0x0003,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf548", BFIN_CPU_BF548, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf548", BFIN_CPU_BF548, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
{"bf548", BFIN_CPU_BF548, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+ | WA_05000074},
+ {"bf549m", BFIN_CPU_BF549M, 0x0003,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+ {"bf549", BFIN_CPU_BF549, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+ {"bf549", BFIN_CPU_BF549, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
{"bf549", BFIN_CPU_BF549, 0x0000,
- WA_SPECULATIVE_LOADS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+ | WA_05000074},
- {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS},
+ {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS
+ | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
{"bf561", BFIN_CPU_BF561, 0x0003,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{"bf561", BFIN_CPU_BF561, 0x0002,
- WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS},
+ WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+ | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+ | WA_05000074},
{NULL, 0, 0, 0}
};
-int splitting_for_sched;
+int splitting_for_sched, splitting_loops;
static void
bfin_globalize_label (FILE *stream, const char *name)
legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
{
rtx addr = orig;
- rtx new = orig;
+ rtx new_rtx = orig;
if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
{
}
tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
- new = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
+ new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
- emit_move_insn (reg, new);
+ emit_move_insn (reg, new_rtx);
if (picreg == pic_offset_table_rtx)
- current_function_uses_pic_offset_table = 1;
+ crtl->uses_pic_offset_table = 1;
return reg;
}
return gen_rtx_PLUS (Pmode, base, addr);
}
- return new;
+ return new_rtx;
}
\f
/* Stack frame layout. */
if (D_REGNO_P (regno))
{
bool is_eh_return_reg = false;
- if (current_function_calls_eh_return)
+ if (crtl->calls_eh_return)
{
unsigned j;
for (j = 0; ; j++)
return ((df_regs_ever_live_p (regno)
&& !fixed_regs[regno]
&& (is_inthandler || !call_used_regs[regno]))
+ || (is_inthandler
+ && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+ && regno == REG_P5)
|| (!TARGET_FDPIC
&& regno == PIC_OFFSET_TABLE_REGNUM
- && (current_function_uses_pic_offset_table
+ && (crtl->uses_pic_offset_table
|| (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf))));
}
else
static bool
must_save_fp_p (void)
{
- return frame_pointer_needed || df_regs_ever_live_p (REG_FP);
+ return df_regs_ever_live_p (REG_FP);
+}
+
+/* Determine if we are going to save the RETS register. */
+static bool
+must_save_rets_p (void)
+{
+ return df_regs_ever_live_p (REG_RETS);
}
static bool
{
/* EH return puts a new return address into the frame using an
address relative to the frame pointer. */
- if (current_function_calls_eh_return)
+ if (crtl->calls_eh_return)
return true;
return frame_pointer_needed;
}
if (saveall || is_inthandler)
{
rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
+
RTX_FRAME_RELATED_P (insn) = 1;
+ for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
+ if (! current_function_is_leaf
+ || cfun->machine->has_hardware_loops
+ || cfun->machine->has_loopreg_clobber
+ || (ENABLE_WA_05000257
+ && (dregno == REG_LC0 || dregno == REG_LC1)))
+ {
+ insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
if (total_consec != 0)
RTX_FRAME_RELATED_P (insn) = 1;
}
if (saveall || is_inthandler)
- emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
+ {
+ for (regno = REG_LB1; regno >= REG_LT0; regno--)
+ if (! current_function_is_leaf
+ || cfun->machine->has_hardware_loops
+ || cfun->machine->has_loopreg_clobber
+ || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
+ emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
+
+ emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
+ }
}
/* Perform any needed actions needed for a function that is receiving a
Zero means the frame pointer need not be set up (and parms may
be accessed via the stack pointer) in functions that seem suitable. */
-int
+static bool
bfin_frame_pointer_required (void)
{
e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
if (fkind != SUBROUTINE)
- return 1;
+ return true;
/* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
so we have to override it for non-leaf functions. */
if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf)
- return 1;
+ return true;
- return 0;
+ return false;
}
/* Return the number of registers pushed during the prologue. */
int i;
if (all || stack_frame_needed_p ())
- /* We use a LINK instruction in this case. */
n += 2;
else
{
if (must_save_fp_p ())
n++;
- if (! current_function_is_leaf)
+ if (must_save_rets_p ())
n++;
}
if (fkind != SUBROUTINE || all)
- /* Increment once for ASTAT. */
- n++;
+ {
+ /* Increment once for ASTAT. */
+ n++;
+ if (! current_function_is_leaf
+ || cfun->machine->has_hardware_loops
+ || cfun->machine->has_loopreg_clobber)
+ {
+ n += 6;
+ }
+ }
if (fkind != SUBROUTINE)
{
return n;
}
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. Frame pointer elimination is automatically handled.
+
+ All other eliminations are valid. */
+
+static bool
+bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+ return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
/* Return the offset between two registers, one to be eliminated, and the other
its replacement, at the start of a routine. */
if (to == STACK_POINTER_REGNUM)
{
- if (current_function_outgoing_args_size >= FIXED_STACK_AREA)
- offset += current_function_outgoing_args_size;
- else if (current_function_outgoing_args_size)
+ if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+ offset += crtl->outgoing_args_size;
+ else if (crtl->outgoing_args_size)
offset += FIXED_STACK_AREA;
offset += get_frame_size ();
if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
|| (!TARGET_FDPIC
&& i == PIC_OFFSET_TABLE_REGNUM
- && (current_function_uses_pic_offset_table
+ && (crtl->uses_pic_offset_table
|| (TARGET_ID_SHARED_LIBRARY
&& ! current_function_is_leaf))))
break;
static HOST_WIDE_INT
arg_area_size (void)
{
- if (current_function_outgoing_args_size)
+ if (crtl->outgoing_args_size)
{
- if (current_function_outgoing_args_size >= FIXED_STACK_AREA)
- return current_function_outgoing_args_size;
+ if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+ return crtl->outgoing_args_size;
else
return FIXED_STACK_AREA;
}
{
frame_size += arg_area_size ();
- if (all || stack_frame_needed_p ()
- || (must_save_fp_p () && ! current_function_is_leaf))
+ if (all
+ || stack_frame_needed_p ()
+ || (must_save_rets_p () && must_save_fp_p ()))
emit_link_insn (spreg, frame_size);
else
{
- if (! current_function_is_leaf)
+ if (must_save_rets_p ())
{
rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
gen_rtx_PRE_DEC (Pmode, spreg)),
{
frame_size += arg_area_size ();
- if (all || stack_frame_needed_p ())
+ if (stack_frame_needed_p ())
emit_insn (gen_unlink ());
else
{
rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
add_to_reg (spreg, frame_size, 0, epilogue_p);
- if (must_save_fp_p ())
+ if (all || must_save_fp_p ())
{
rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
emit_move_insn (fpreg, postinc);
- emit_insn (gen_rtx_USE (VOIDmode, fpreg));
+ emit_use (fpreg);
}
- if (! current_function_is_leaf)
+ if (all || must_save_rets_p ())
{
emit_move_insn (bfin_rets_rtx, postinc);
- emit_insn (gen_rtx_USE (VOIDmode, bfin_rets_rtx));
+ emit_use (bfin_rets_rtx);
}
}
}
all = true;
expand_prologue_reg_save (spreg, all, true);
+ if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+ {
+ rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
+ rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
+ emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
+ emit_insn (gen_movsi_high (p5reg, chipid));
+ emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
+ emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
+ }
+
if (lookup_attribute ("nesting", attrs))
{
- rtx srcreg = gen_rtx_REG (Pmode, (fkind == EXCPT_HANDLER ? REG_RETX
- : fkind == NMI_HANDLER ? REG_RETN
- : REG_RETI));
+ rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
insn = emit_move_insn (predec, srcreg);
RTX_FRAME_RELATED_P (insn) = 1;
}
if (lookup_attribute ("nesting", attrs))
{
- rtx srcreg = gen_rtx_REG (Pmode, (fkind == EXCPT_HANDLER ? REG_RETX
- : fkind == NMI_HANDLER ? REG_RETN
- : REG_RETI));
+ rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
emit_move_insn (srcreg, postinc);
}
if (fkind == EXCPT_HANDLER)
emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
- emit_jump_insn (gen_return_internal (GEN_INT (fkind)));
+ emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
}
/* Used while emitting the prologue to generate code to load the correct value
struct cgraph_local_info *i = NULL;
rtx addr, insn;
- if (flag_unit_at_a_time)
- i = cgraph_local_info (current_function_decl);
+ i = cgraph_local_info (current_function_decl);
/* Functions local to the translation unit don't need to reload the
pic reg, since the caller always passes a usable one. */
return;
}
- if (current_function_limit_stack
- || TARGET_STACK_CHECK_L1)
+ if (crtl->limit_stack
+ || (TARGET_STACK_CHECK_L1
+ && !DECL_NO_LIMIT_STACK (current_function_decl)))
{
HOST_WIDE_INT offset
= bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
STACK_POINTER_REGNUM);
- rtx lim = current_function_limit_stack ? stack_limit_rtx : NULL_RTX;
+ rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
if (!lim)
}
expand_prologue_reg_save (spreg, all, false);
- do_link (spreg, frame_size, false);
+ do_link (spreg, frame_size, all);
if (TARGET_ID_SHARED_LIBRARY
&& !TARGET_SEP_DATA
- && (current_function_uses_pic_offset_table
+ && (crtl->uses_pic_offset_table
|| !current_function_is_leaf))
bfin_load_pic_reg (pic_offset_table_rtx);
}
return;
}
- do_unlink (spreg, get_frame_size (), false, e);
+ do_unlink (spreg, get_frame_size (), all, e);
expand_epilogue_reg_restore (spreg, all, false);
if (eh_return)
emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
- emit_jump_insn (gen_return_internal (GEN_INT (SUBROUTINE)));
+ emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
}
\f
/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
return get_hard_reg_initial_val (Pmode, REG_RETS);
}
-/* Try machine-dependent ways of modifying an illegitimate address X
- to be legitimate. If we find one, return the new, valid address,
- otherwise return NULL_RTX.
-
- OLDX is the address as it was before break_out_memory_refs was called.
- In some cases it is useful to look at this to decide what needs to be done.
-
- MODE is the mode of the memory reference. */
-
-rtx
-legitimize_address (rtx x ATTRIBUTE_UNUSED, rtx oldx ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED)
-{
- return NULL_RTX;
-}
-
static rtx
bfin_delegitimize_address (rtx orig_x)
{
All addressing modes are equally cheap on the Blackfin. */
static int
-bfin_address_cost (rtx addr ATTRIBUTE_UNUSED)
+bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
{
return 1;
}
/* Decide whether a type should be returned in memory (true)
or in a register (false). This is called by the macro
- RETURN_IN_MEMORY. */
+ TARGET_RETURN_IN_MEMORY. */
-int
-bfin_return_in_memory (const_tree type)
+static bool
+bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size = int_size_in_bytes (type);
return size > 2 * UNITS_PER_WORD || size == -1;
bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
tree exp ATTRIBUTE_UNUSED)
{
+ struct cgraph_local_info *this_func, *called_func;
e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
if (fkind != SUBROUTINE)
return false;
not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
sibcall epilogue, and we end up with the wrong value in P5. */
- if (!flag_unit_at_a_time || decl == NULL)
+ if (!decl)
/* Not enough information. */
return false;
-
- {
- struct cgraph_local_info *this_func, *called_func;
- this_func = cgraph_local_info (current_function_decl);
- called_func = cgraph_local_info (decl);
- return !called_func->local || this_func->local;
- }
+ this_func = cgraph_local_info (current_function_decl);
+ called_func = cgraph_local_info (decl);
+ return !called_func->local || this_func->local;
}
\f
+/* Write a template for a trampoline to F. */
+
+static void
+bfin_asm_trampoline_template (FILE *f)
+{
+ if (TARGET_FDPIC)
+ {
+ fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */
+ fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */
+ fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */
+ fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */
+ fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */
+ fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */
+ fprintf (f, "\t.dw\t0xac4b\n"); /* p3 = [p1 + 4] */
+ fprintf (f, "\t.dw\t0x9149\n"); /* p1 = [p1] */
+ fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/
+ }
+ else
+ {
+ fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */
+ fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */
+ fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */
+ fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */
+ fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/
+ }
+}
+
/* Emit RTL insns to initialize the variable parts of a trampoline at
- TRAMP. FNADDR is an RTX for the address of the function's pure
- code. CXT is an RTX for the static chain value for the function. */
+ M_TRAMP. FNDECL is the target function. CHAIN_VALUE is an RTX for
+ the static chain value for the function. */
-void
-initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
+static void
+bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
{
- rtx t1 = copy_to_reg (fnaddr);
- rtx t2 = copy_to_reg (cxt);
- rtx addr;
+ rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
+ rtx t2 = copy_to_reg (chain_value);
+ rtx mem;
int i = 0;
+ emit_block_move (m_tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
if (TARGET_FDPIC)
{
- rtx a = memory_address (Pmode, plus_constant (tramp, 8));
- addr = memory_address (Pmode, tramp);
- emit_move_insn (gen_rtx_MEM (SImode, addr), a);
+ rtx a = force_reg (Pmode, plus_constant (XEXP (m_tramp, 0), 8));
+ mem = adjust_address (m_tramp, Pmode, 0);
+ emit_move_insn (mem, a);
i = 8;
}
- addr = memory_address (Pmode, plus_constant (tramp, i + 2));
- emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
+ mem = adjust_address (m_tramp, HImode, i + 2);
+ emit_move_insn (mem, gen_lowpart (HImode, t1));
emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
- addr = memory_address (Pmode, plus_constant (tramp, i + 6));
- emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
+ mem = adjust_address (m_tramp, HImode, i + 6);
+ emit_move_insn (mem, gen_lowpart (HImode, t1));
- addr = memory_address (Pmode, plus_constant (tramp, i + 10));
- emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
+ mem = adjust_address (m_tramp, HImode, i + 10);
+ emit_move_insn (mem, gen_lowpart (HImode, t2));
emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
- addr = memory_address (Pmode, plus_constant (tramp, i + 14));
- emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
+ mem = adjust_address (m_tramp, HImode, i + 14);
+ emit_move_insn (mem, gen_lowpart (HImode, t2));
}
/* Emit insns to move operands[1] into operands[0]. */
bfin_longcall_p (rtx op, int call_cookie)
{
gcc_assert (GET_CODE (op) == SYMBOL_REF);
+ if (SYMBOL_REF_WEAK (op))
+ return 1;
if (call_cookie & CALL_SHORT)
return 0;
if (call_cookie & CALL_LONG)
{
rtx use = NULL, call;
rtx callee = XEXP (fnaddr, 0);
- int nelts = 2 + !!sibcall;
+ int nelts = 3;
rtx pat;
rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
+ rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
int n;
/* In an untyped call, we can get NULL for operand 2. */
if (TARGET_FDPIC)
{
- int caller_has_l1_text, callee_has_l1_text;
+ int caller_in_sram, callee_in_sram;
- caller_has_l1_text = callee_has_l1_text = 0;
+ /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram. */
+ caller_in_sram = callee_in_sram = 0;
if (lookup_attribute ("l1_text",
DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
- caller_has_l1_text = 1;
+ caller_in_sram = 1;
+ else if (lookup_attribute ("l2",
+ DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
+ caller_in_sram = 2;
if (GET_CODE (callee) == SYMBOL_REF
- && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee))
- && lookup_attribute
- ("l1_text",
- DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
- callee_has_l1_text = 1;
+ && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)))
+ {
+ if (lookup_attribute
+ ("l1_text",
+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+ callee_in_sram = 1;
+ else if (lookup_attribute
+ ("l2",
+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+ callee_in_sram = 2;
+ }
if (GET_CODE (callee) != SYMBOL_REF
|| bfin_longcall_p (callee, INTVAL (cookie))
|| (GET_CODE (callee) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (callee)
&& TARGET_INLINE_PLT)
- || caller_has_l1_text != callee_has_l1_text
- || (caller_has_l1_text && callee_has_l1_text
+ || caller_in_sram != callee_in_sram
+ || (caller_in_sram && callee_in_sram
&& (GET_CODE (callee) != SYMBOL_REF
|| !SYMBOL_REF_LOCAL_P (callee))))
{
XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
if (sibcall)
XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode);
+ else
+ XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
call = emit_call_insn (pat);
if (use)
CALL_INSN_FUNCTION_USAGE (call) = use;
hard_regno_mode_ok (int regno, enum machine_mode mode)
{
/* Allow only dregs to store value of mode HI or QI */
- enum reg_class class = REGNO_REG_CLASS (regno);
+ enum reg_class rclass = REGNO_REG_CLASS (regno);
if (mode == CCmode)
return 0;
if (mode == V2HImode)
return D_REGNO_P (regno);
- if (class == CCREGS)
+ if (rclass == CCREGS)
return mode == BImode;
if (mode == PDImode || mode == V2PDImode)
return regno == REG_A0 || regno == REG_A1;
enum reg_class class1, enum reg_class class2)
{
/* These need secondary reloads, so they're more expensive. */
- if ((class1 == CCREGS && class2 != DREGS)
- || (class1 != DREGS && class2 == CCREGS))
+ if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
+ || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
return 4;
/* If optimizing for size, always prefer reg-reg over reg-memory moves. */
if (optimize_size)
return 2;
- /* There are some stalls involved when moving from a DREG to a different
- class reg, and using the value in one of the following instructions.
- Attempt to model this by slightly discouraging such moves. */
- if (class1 == DREGS && class2 != DREGS)
- return 2 * 2;
-
if (GET_MODE_CLASS (mode) == MODE_INT)
{
/* Discourage trying to use the accumulators. */
int
bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
- enum reg_class class,
+ enum reg_class rclass,
int in ATTRIBUTE_UNUSED)
{
/* Make memory accesses slightly more expensive than any register-register
move. Also, penalize non-DP registers, since they need secondary
reloads to load and store. */
- if (! reg_class_subset_p (class, DPREGS))
+ if (! reg_class_subset_p (rclass, DPREGS))
return 10;
return 8;
}
/* Inform reload about cases where moving X with a mode MODE to a register in
- CLASS requires an extra scratch register. Return the class needed for the
+ RCLASS requires an extra scratch register. Return the class needed for the
scratch register. */
static enum reg_class
-bfin_secondary_reload (bool in_p, rtx x, enum reg_class class,
+bfin_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
enum machine_mode mode, secondary_reload_info *sri)
{
/* If we have HImode or QImode, we can only use DREGS as secondary registers;
if (fp_plus_const_operand (x, mode))
{
rtx op2 = XEXP (x, 1);
- int large_constant_p = ! CONST_7BIT_IMM_P (INTVAL (op2));
+ int large_constant_p = ! satisfies_constraint_Ks7 (op2);
- if (class == PREGS || class == PREGS_CLOBBERED)
+ if (rclass == PREGS || rclass == PREGS_CLOBBERED)
return NO_REGS;
/* If destination is a DREG, we can do this without a scratch register
if the constant is valid for an add instruction. */
- if ((class == DREGS || class == DPREGS)
+ if ((rclass == DREGS || rclass == DPREGS)
&& ! large_constant_p)
return NO_REGS;
/* Reloading to anything other than a DREG? Use a PREG scratch
AREGS are an exception; they can only move to or from another register
in AREGS or one in DREGS. They can also be assigned the constant 0. */
if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
- return (class == DREGS || class == AREGS || class == EVEN_AREGS
- || class == ODD_AREGS
+ return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
+ || rclass == ODD_AREGS
? NO_REGS : DREGS);
- if (class == AREGS || class == EVEN_AREGS || class == ODD_AREGS)
+ if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
{
if (code == MEM)
{
}
/* CCREGS can only be moved from/to DREGS. */
- if (class == CCREGS && x_class != DREGS)
+ if (rclass == CCREGS && x_class != DREGS)
return DREGS;
- if (x_class == CCREGS && class != DREGS)
+ if (x_class == CCREGS && rclass != DREGS)
return DREGS;
/* All registers other than AREGS can load arbitrary constants. The only
case that remains is MEM. */
if (code == MEM)
- if (! reg_class_subset_p (class, default_class))
+ if (! reg_class_subset_p (rclass, default_class))
return default_class;
return NO_REGS;
q = arg + strlen (p);
- cputype_selected = true;
-
if (*q == '\0')
{
bfin_si_revision = bfin_cpus[i].si_revision;
bfin_workarounds |= bfin_cpus[i].workarounds;
}
- if (bfin_cpu_type == BFIN_CPU_BF561)
- warning (0, "bf561 support is incomplete yet.");
-
return true;
}
static struct machine_function *
bfin_init_machine_status (void)
{
- struct machine_function *f;
-
- f = ggc_alloc_cleared (sizeof (struct machine_function));
-
- return f;
+ return ggc_alloc_cleared_machine_function ();
}
/* Implement the macro OVERRIDE_OPTIONS. */
void
override_options (void)
{
+ /* If processor type is not specified, enable all workarounds. */
+ if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
+ {
+ int i;
+
+ for (i = 0; bfin_cpus[i].name != NULL; i++)
+ bfin_workarounds |= bfin_cpus[i].workarounds;
+
+ bfin_si_revision = 0xffff;
+ }
+
if (bfin_csync_anomaly == 1)
bfin_workarounds |= WA_SPECULATIVE_SYNCS;
else if (bfin_csync_anomaly == 0)
else if (bfin_specld_anomaly == 0)
bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
- if (!cputype_selected)
- bfin_workarounds |= WA_RETS;
-
if (TARGET_OMIT_LEAF_FRAME_POINTER)
flag_omit_frame_pointer = 1;
if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
flag_pic = 0;
+ if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
+ error ("-mmulticore can only be used with BF561");
+
+ if (TARGET_COREA && !TARGET_MULTICORE)
+ error ("-mcorea should be used with -mmulticore");
+
+ if (TARGET_COREB && !TARGET_MULTICORE)
+ error ("-mcoreb should be used with -mmulticore");
+
+ if (TARGET_COREA && TARGET_COREB)
+ error ("-mcorea and -mcoreb can't be used together");
+
flag_schedule_insns = 0;
/* Passes after sched2 can break the helpful TImode annotations that
bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
{
enum rtx_code code1, code2;
- rtx op0 = bfin_compare_op0, op1 = bfin_compare_op1;
+ rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
rtx tem = bfin_cc_rtx;
enum rtx_code code = GET_CODE (cmp);
code2 = EQ;
break;
}
- emit_insn (gen_rtx_SET (BImode, tem,
+ emit_insn (gen_rtx_SET (VOIDmode, tem,
gen_rtx_fmt_ee (code1, BImode, op0, op1)));
}
if (D_REGNO_P (regno))
{
- if (CONST_7BIT_IMM_P (tmp))
+ if (tmp >= -64 && tmp <= 63)
{
emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
return 0;
if (optimize_size
- && num_compl_zero && CONST_7BIT_IMM_P (shifted_compl))
+ && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
{
/* If optimizing for size, generate a sequence that has more instructions
but is shorter. */
return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
}
-bool
-bfin_legitimate_address_p (enum machine_mode mode, rtx x, int strict)
+/* Recognize an RTL expression that is a valid memory address for an
+ instruction. The MODE argument is the machine mode for the MEM expression
+ that wants to use this address.
+
+ Blackfin addressing modes are as follows:
+
+ [preg]
+ [preg + imm16]
+
+ B [ Preg + uimm15 ]
+ W [ Preg + uimm16m2 ]
+ [ Preg + uimm17m4 ]
+
+ [preg++]
+ [preg--]
+ [--sp]
+*/
+
+static bool
+bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
{
switch (GET_CODE (x)) {
case REG:
}
static bool
-bfin_rtx_costs (rtx x, int code, int outer_code, int *total)
+bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
{
int cost2 = COSTS_N_INSNS (1);
rtx op0, op1;
{
case CONST_INT:
if (outer_code == SET || outer_code == PLUS)
- *total = CONST_7BIT_IMM_P (INTVAL (x)) ? 0 : cost2;
+ *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
else if (outer_code == AND)
*total = log2constp (~INTVAL (x)) ? 0 : cost2;
else if (outer_code == LE || outer_code == LT || outer_code == EQ)
if (val == 2 || val == 4)
{
*total = cost2;
- *total += rtx_cost (XEXP (op0, 0), outer_code);
- *total += rtx_cost (op1, outer_code);
+ *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
+ *total += rtx_cost (op1, outer_code, speed);
return true;
}
}
*total = cost2;
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, SET);
+ *total += rtx_cost (op0, SET, speed);
#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
towards creating too many induction variables. */
if (!reg_or_7bit_operand (op1, SImode))
- *total += rtx_cost (op1, SET);
+ *total += rtx_cost (op1, SET, speed);
#endif
}
else if (GET_MODE (x) == DImode)
{
*total = 6 * cost2;
if (GET_CODE (op1) != CONST_INT
- || !CONST_7BIT_IMM_P (INTVAL (op1)))
- *total += rtx_cost (op1, PLUS);
+ || !satisfies_constraint_Ks7 (op1))
+ *total += rtx_cost (op1, PLUS, speed);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, PLUS);
+ *total += rtx_cost (op0, PLUS, speed);
}
return true;
op1 = XEXP (x, 1);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, code);
+ *total += rtx_cost (op0, code, speed);
return true;
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, code);
+ *total += rtx_cost (op0, code, speed);
if (GET_MODE (x) == DImode)
{
if (code == AND)
{
if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
- *total += rtx_cost (XEXP (x, 1), code);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
}
else
{
if (! regorlog2_operand (XEXP (x, 1), SImode))
- *total += rtx_cost (XEXP (x, 1), code);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
}
return true;
op0 = XEXP (op0, 0);
op1 = XEXP (op1, 0);
}
- else if (optimize_size)
+ else if (!speed)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
if (GET_CODE (op0) != REG
&& (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
- *total += rtx_cost (op0, MULT);
+ *total += rtx_cost (op0, MULT, speed);
if (GET_CODE (op1) != REG
&& (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
- *total += rtx_cost (op1, MULT);
+ *total += rtx_cost (op1, MULT, speed);
}
return true;
/* Used for communication between {push,pop}_multiple_operation (which
we use not only as a predicate) and the corresponding output functions. */
static int first_preg_to_save, first_dreg_to_save;
+static int n_regs_to_save;
int
push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
lastpreg++;
}
}
+ n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
}
first_dreg_to_save = lastdreg;
first_preg_to_save = lastpreg;
+ n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
+ cfun->machine->has_loopreg_clobber = true;
}
if (count & 2)
{
countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
+ cfun->machine->has_loopreg_clobber = true;
}
}
if (count & 1)
return cost;
}
+\f
+/* This function acts like NEXT_INSN, but is aware of three-insn bundles and
+ skips all subsequent parallel instructions if INSN is the start of such
+ a group. */
+static rtx
+find_next_insn_start (rtx insn)
+{
+ if (GET_MODE (insn) == SImode)
+ {
+ while (GET_MODE (insn) != QImode)
+ insn = NEXT_INSN (insn);
+ }
+ return NEXT_INSN (insn);
+}
+/* This function acts like PREV_INSN, but is aware of three-insn bundles and
+ skips all subsequent parallel instructions if INSN is the start of such
+ a group. */
+static rtx
+find_prev_insn_start (rtx insn)
+{
+ insn = PREV_INSN (insn);
+ gcc_assert (GET_MODE (insn) != SImode);
+ if (GET_MODE (insn) == QImode)
+ {
+ while (GET_MODE (PREV_INSN (insn)) == SImode)
+ insn = PREV_INSN (insn);
+ }
+ return insn;
+}
\f
/* Increment the counter for the number of loop instructions in the
current function. */
/* Information about a loop we have found (or are in the process of
finding). */
-struct loop_info GTY (())
+struct GTY (()) loop_info
{
/* loop number, for dumps */
int loop_no;
/* The iteration register. */
rtx iter_reg;
- /* The new initialization insn. */
- rtx init;
-
- /* The new initialization instruction. */
- rtx loop_init;
-
/* The new label placed at the beginning of the loop. */
rtx start_label;
length = 4;
}
- if (INSN_P (insn))
+ if (NONDEBUG_INSN_P (insn))
length += get_attr_length (insn);
return length;
{
basic_block bb;
loop_info inner;
- rtx insn, init_insn, last_insn, nop_insn;
+ rtx insn, last_insn;
rtx loop_init, start_label, end_label;
rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
- rtx iter_reg;
+ rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
rtx lc_reg, lt_reg, lb_reg;
rtx seq, seq_end;
int length;
/* Get the loop iteration register. */
iter_reg = loop->iter_reg;
- if (!DPREG_P (iter_reg))
+ if (!REG_P (iter_reg))
{
if (dump_file)
- fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n",
+ fprintf (dump_file, ";; loop %d iteration count not in a register\n",
loop->loop_no);
goto bad_loop;
}
+ scratchreg = NULL_RTX;
+ scratch_init = iter_reg;
+ scratch_init_insn = NULL_RTX;
+ if (!PREG_P (iter_reg) && loop->incoming_src)
+ {
+ basic_block bb_in = loop->incoming_src;
+ int i;
+ for (i = REG_P0; i <= REG_P5; i++)
+ if ((df_regs_ever_live_p (i)
+ || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
+ && call_used_regs[i]))
+ && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
+ {
+ scratchreg = gen_rtx_REG (SImode, i);
+ break;
+ }
+ for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
+ insn = PREV_INSN (insn))
+ {
+ rtx set;
+ if (NOTE_P (insn) || BARRIER_P (insn))
+ continue;
+ set = single_set (insn);
+ if (set && rtx_equal_p (SET_DEST (set), iter_reg))
+ {
+ if (CONSTANT_P (SET_SRC (set)))
+ {
+ scratch_init = SET_SRC (set);
+ scratch_init_insn = insn;
+ }
+ break;
+ }
+ else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
+ break;
+ }
+ }
if (loop->incoming_src)
{
/* Make sure the predecessor is before the loop start label, as required by
the LSETUP instruction. */
length = 0;
- for (insn = BB_END (loop->incoming_src);
- insn && insn != loop->start_label;
- insn = NEXT_INSN (insn))
+ insn = BB_END (loop->incoming_src);
+ /* If we have to insert the LSETUP before a jump, count that jump in the
+ length. */
+ if (VEC_length (edge, loop->incoming) > 1
+ || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
+ {
+ gcc_assert (JUMP_P (insn));
+ insn = PREV_INSN (insn);
+ }
+
+ for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
length += length_for_loop (insn);
-
+
if (!insn)
{
if (dump_file)
goto bad_loop;
}
+ /* Account for the pop of a scratch register where necessary. */
+ if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
+ && ENABLE_WA_LOAD_LCREGS)
+ length += 2;
+
if (length > MAX_LSETUP_DISTANCE)
{
if (dump_file)
- Returns (RTS, RTN, etc.) */
bb = loop->tail;
- last_insn = PREV_INSN (loop->loop_end);
+ last_insn = find_prev_insn_start (loop->loop_end);
while (1)
{
- for (; last_insn != PREV_INSN (BB_HEAD (bb));
- last_insn = PREV_INSN (last_insn))
- if (INSN_P (last_insn))
+ for (; last_insn != BB_HEAD (bb);
+ last_insn = find_prev_insn_start (last_insn))
+ if (NONDEBUG_INSN_P (last_insn))
break;
- if (last_insn != PREV_INSN (BB_HEAD (bb)))
+ if (last_insn != BB_HEAD (bb))
break;
if (single_pred_p (bb)
+ && single_pred_edge (bb)->flags & EDGE_FALLTHRU
&& single_pred (bb) != ENTRY_BLOCK_PTR)
{
bb = single_pred (bb);
goto bad_loop;
}
- if (JUMP_P (last_insn))
+ if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
{
- loop_info inner = bb->aux;
- if (inner
- && inner->outer == loop
- && inner->loop_end == last_insn
- && inner->depth == 1)
- /* This jump_insn is the exact loop_end of an inner loop
- and to be optimized away. So use the inner's last_insn. */
- last_insn = inner->last_insn;
- else
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has bad last instruction\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+ /* In all other cases, try to replace a bad last insn with a nop. */
+ else if (JUMP_P (last_insn)
+ || CALL_P (last_insn)
+ || get_attr_type (last_insn) == TYPE_SYNC
+ || get_attr_type (last_insn) == TYPE_CALL
+ || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
+ || recog_memoized (last_insn) == CODE_FOR_return_internal
+ || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (last_insn)) >= 0)
+ {
+ if (loop->length + 2 > MAX_LOOP_LENGTH)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d has bad last instruction\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
goto bad_loop;
}
- }
- else if (CALL_P (last_insn)
- || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
- && get_attr_type (last_insn) == TYPE_SYNC)
- || recog_memoized (last_insn) == CODE_FOR_return_internal)
- {
if (dump_file)
- fprintf (dump_file, ";; loop %d has bad last instruction\n",
+ fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
loop->loop_no);
- goto bad_loop;
- }
- if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT
- || asm_noperands (PATTERN (last_insn)) >= 0
- || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
- && get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI))
- {
- nop_insn = emit_insn_after (gen_nop (), last_insn);
- last_insn = nop_insn;
+ last_insn = emit_insn_after (gen_forced_nop (), last_insn);
}
loop->last_insn = last_insn;
loop->clobber_loop0 = 1;
}
- /* If iter_reg is a DREG, we need generate an instruction to load
- the loop count into LC register. */
- if (D_REGNO_P (REGNO (iter_reg)))
+ loop->end_label = end_label;
+
+ /* Create a sequence containing the loop setup. */
+ start_sequence ();
+
+ /* LSETUP only accepts P registers. If we have one, we can use it,
+ otherwise there are several ways of working around the problem.
+ If we're not affected by anomaly 312, we can load the LC register
+ from any iteration register, and use LSETUP without initialization.
+ If we've found a P scratch register that's not live here, we can
+ instead copy the iter_reg into that and use an initializing LSETUP.
+ If all else fails, push and pop P0 and use it as a scratch. */
+ if (P_REGNO_P (REGNO (iter_reg)))
+ {
+ loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+ lb_reg, end_label,
+ lc_reg, iter_reg);
+ seq_end = emit_insn (loop_init);
+ }
+ else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
{
- init_insn = gen_movsi (lc_reg, iter_reg);
+ emit_insn (gen_movsi (lc_reg, iter_reg));
loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
lb_reg, end_label,
lc_reg);
+ seq_end = emit_insn (loop_init);
}
- else if (P_REGNO_P (REGNO (iter_reg)))
+ else if (scratchreg != NULL_RTX)
{
- init_insn = NULL_RTX;
+ emit_insn (gen_movsi (scratchreg, scratch_init));
loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
lb_reg, end_label,
- lc_reg, iter_reg);
+ lc_reg, scratchreg);
+ seq_end = emit_insn (loop_init);
+ if (scratch_init_insn != NULL_RTX)
+ delete_insn (scratch_init_insn);
}
else
- gcc_unreachable ();
-
- loop->init = init_insn;
- loop->end_label = end_label;
- loop->loop_init = loop_init;
+ {
+ rtx p0reg = gen_rtx_REG (SImode, REG_P0);
+ rtx push = gen_frame_mem (SImode,
+ gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+ rtx pop = gen_frame_mem (SImode,
+ gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+ emit_insn (gen_movsi (push, p0reg));
+ emit_insn (gen_movsi (p0reg, scratch_init));
+ loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+ lb_reg, end_label,
+ lc_reg, p0reg);
+ emit_insn (loop_init);
+ seq_end = emit_insn (gen_movsi (p0reg, pop));
+ if (scratch_init_insn != NULL_RTX)
+ delete_insn (scratch_init_insn);
+ }
if (dump_file)
{
fprintf (dump_file, ";; replacing loop %d initializer with\n",
loop->loop_no);
- print_rtl_single (dump_file, loop->loop_init);
+ print_rtl_single (dump_file, loop_init);
fprintf (dump_file, ";; replacing loop %d terminator with\n",
loop->loop_no);
print_rtl_single (dump_file, loop->loop_end);
}
- start_sequence ();
-
- if (loop->init != NULL_RTX)
- emit_insn (loop->init);
- seq_end = emit_insn (loop->loop_init);
+ /* If the loop isn't entered at the top, also create a jump to the entry
+ point. */
+ if (!loop->incoming_src && loop->head != loop->incoming_dest)
+ {
+ rtx label = BB_HEAD (loop->incoming_dest);
+ /* If we're jumping to the final basic block in the loop, and there's
+ only one cheap instruction before the end (typically an increment of
+ an induction variable), we can just emit a copy here instead of a
+ jump. */
+ if (loop->incoming_dest == loop->tail
+ && next_real_insn (label) == last_insn
+ && asm_noperands (last_insn) < 0
+ && GET_CODE (PATTERN (last_insn)) == SET)
+ {
+ seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
+ }
+ else
+ {
+ emit_jump_insn (gen_jump (label));
+ seq_end = emit_barrier ();
+ }
+ }
seq = get_insns ();
end_sequence ();
basic_block new_bb;
edge e;
edge_iterator ei;
-
+
+#ifdef ENABLE_CHECKING
if (loop->head != loop->incoming_dest)
{
+ /* We aren't entering the loop at the top. Since we've established
+ that the loop is entered only at one point, this means there
+ can't be fallthru edges into the head. Any such fallthru edges
+ would become invalid when we insert the new block, so verify
+ that this does not in fact happen. */
FOR_EACH_EDGE (e, ei, loop->head->preds)
- {
- if (e->flags & EDGE_FALLTHRU)
- {
- rtx newjump = gen_jump (loop->start_label);
- emit_insn_before (newjump, BB_HEAD (loop->head));
- new_bb = create_basic_block (newjump, newjump, loop->head->prev_bb);
- gcc_assert (new_bb = loop->head->prev_bb);
- break;
- }
- }
+ gcc_assert (!(e->flags & EDGE_FALLTHRU));
}
+#endif
emit_insn_before (seq, BB_HEAD (loop->head));
seq = emit_label_before (gen_label_rtx (), seq);
else
redirect_edge_succ (e, new_bb);
}
+ e = make_edge (new_bb, loop->head, 0);
}
-
+
delete_insn (loop->loop_end);
/* Insert the loop end label before the last instruction of the loop. */
emit_label_before (loop->end_label, loop->last_insn);
{
/* If loop->iter_reg is a DREG or PREG, we can split it here
without scratch register. */
- rtx insn;
+ rtx insn, test;
emit_insn_before (gen_addsi3 (loop->iter_reg,
loop->iter_reg,
constm1_rtx),
loop->loop_end);
- emit_insn_before (gen_cmpsi (loop->iter_reg, const0_rtx),
- loop->loop_end);
-
- insn = emit_jump_insn_before (gen_bne (loop->start_label),
+ test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+ insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+ loop->iter_reg, const0_rtx,
+ loop->start_label),
loop->loop_end);
JUMP_LABEL (insn) = loop->start_label;
loop->outer = NULL;
loop->loops = NULL;
loop->incoming = VEC_alloc (edge, gc, 2);
- loop->init = loop->loop_init = NULL_RTX;
loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
loop->end_label = NULL_RTX;
loop->bad = 0;
break;
}
}
+ if (!retry)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; No forwarder blocks found\n");
+ loop->bad = 1;
+ }
}
}
}
if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
{
+ rtx insn;
/* A possible loop end */
+ /* There's a degenerate case we can handle - an empty loop consisting
+ of only a back branch. Handle that by deleting the branch. */
+ insn = BB_HEAD (BRANCH_EDGE (bb)->dest);
+ if (next_real_insn (insn) == tail)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; degenerate loop ending at\n");
+ print_rtl_single (dump_file, tail);
+ }
+ delete_insn_and_edges (tail);
+ continue;
+ }
+
loop = XNEW (struct loop_info);
loop->next = loops;
loops = loop;
fprintf (dump_file, ";; All loops found:\n\n");
bfin_dump_loops (loops);
}
-
+
/* Now apply the optimizations. */
for (loop = loops; loop; loop = loop->next)
bfin_optimize_loop (loop);
FOR_EACH_BB (bb)
bb->aux = NULL;
+
+ splitting_loops = 1;
+ FOR_EACH_BB (bb)
+ {
+ rtx insn = BB_END (bb);
+ if (!JUMP_P (insn))
+ continue;
+
+ try_split (PATTERN (insn), insn, 1);
+ }
+ splitting_loops = 0;
}
\f
/* Possibly generate a SEQUENCE out of three insns found in SLOT.
{
gcc_assert (slot[1] != NULL_RTX);
+ /* Don't add extra NOPs if optimizing for size. */
+ if (optimize_size
+ && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
+ return false;
+
/* Verify that we really can do the multi-issue. */
if (slot[0])
{
for (insn = BB_HEAD (bb);; insn = next)
{
int at_end;
- if (INSN_P (insn))
+ rtx delete_this = NULL_RTX;
+
+ if (NONDEBUG_INSN_P (insn))
{
- if (get_attr_type (insn) == TYPE_DSP32)
- slot[0] = insn;
- else if (slot[1] == NULL_RTX)
- slot[1] = insn;
+ enum attr_type type = get_attr_type (insn);
+
+ if (type == TYPE_STALL)
+ {
+ gcc_assert (n_filled == 0);
+ delete_this = insn;
+ }
else
- slot[2] = insn;
- n_filled++;
+ {
+ if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
+ slot[0] = insn;
+ else if (slot[1] == NULL_RTX)
+ slot[1] = insn;
+ else
+ slot[2] = insn;
+ n_filled++;
+ }
}
next = NEXT_INSN (insn);
/* BB_END can change due to emitting extra NOPs, so check here. */
at_end = insn == BB_END (bb);
- if (at_end || GET_MODE (next) == TImode)
+ if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
{
if ((n_filled < 2
|| !gen_one_bundle (slot))
n_filled = 0;
slot[0] = slot[1] = slot[2] = NULL_RTX;
}
+ if (delete_this != NULL_RTX)
+ delete_insn (delete_this);
if (at_end)
break;
}
}
}
\f
-/* Return an insn type for INSN that can be used by the caller for anomaly
- workarounds. This differs from plain get_attr_type in that it handles
- SEQUENCEs. */
-
-static enum attr_type
-type_for_anomaly (rtx insn)
+/* On some silicon revisions, functions shorter than a certain number of cycles
+ can cause unpredictable behaviour. Work around this by adding NOPs as
+ needed. */
+static void
+workaround_rts_anomaly (void)
{
- rtx pat = PATTERN (insn);
- if (GET_CODE (pat) == SEQUENCE)
- {
- enum attr_type t;
- t = get_attr_type (XVECEXP (pat, 0, 1));
- if (t == TYPE_MCLD)
- return t;
- t = get_attr_type (XVECEXP (pat, 0, 2));
- if (t == TYPE_MCLD)
- return t;
- return TYPE_MCST;
- }
- else
- return get_attr_type (insn);
-}
+ rtx insn, first_insn = NULL_RTX;
+ int cycles = 4;
-/* Return nonzero if INSN contains any loads that may trap. It handles
- SEQUENCEs correctly. */
+ if (! ENABLE_WA_RETS)
+ return;
-static bool
-trapping_loads_p (rtx insn)
-{
- rtx pat = PATTERN (insn);
- if (GET_CODE (pat) == SEQUENCE)
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx pat;
+
+ if (BARRIER_P (insn))
+ return;
+
+ if (NOTE_P (insn) || LABEL_P (insn))
+ continue;
+
+ if (first_insn == NULL_RTX)
+ first_insn = insn;
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (CALL_P (insn))
+ return;
+
+ if (JUMP_P (insn))
+ {
+ if (recog_memoized (insn) == CODE_FOR_return_internal)
+ break;
+
+ /* Nothing to worry about for direct jumps. */
+ if (!any_condjump_p (insn))
+ return;
+ if (cycles <= 1)
+ return;
+ cycles--;
+ }
+ else if (INSN_P (insn))
+ {
+ rtx pat = PATTERN (insn);
+ int this_cycles = 1;
+
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ if (push_multiple_operation (pat, VOIDmode)
+ || pop_multiple_operation (pat, VOIDmode))
+ this_cycles = n_regs_to_save;
+ }
+ else
+ {
+ enum insn_code icode = recog_memoized (insn);
+ if (icode == CODE_FOR_link)
+ this_cycles = 4;
+ else if (icode == CODE_FOR_unlink)
+ this_cycles = 3;
+ else if (icode == CODE_FOR_mulsi3)
+ this_cycles = 5;
+ }
+ if (this_cycles >= cycles)
+ return;
+
+ cycles -= this_cycles;
+ }
+ }
+ while (cycles > 0)
+ {
+ emit_insn_before (gen_nop (), first_insn);
+ cycles--;
+ }
+}
+
+/* Return an insn type for INSN that can be used by the caller for anomaly
+ workarounds. This differs from plain get_attr_type in that it handles
+ SEQUENCEs. */
+
+static enum attr_type
+type_for_anomaly (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SEQUENCE)
{
enum attr_type t;
t = get_attr_type (XVECEXP (pat, 0, 1));
- if (t == TYPE_MCLD
- && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1)))))
- return true;
+ if (t == TYPE_MCLD)
+ return t;
t = get_attr_type (XVECEXP (pat, 0, 2));
- if (t == TYPE_MCLD
- && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2)))))
- return true;
- return false;
+ if (t == TYPE_MCLD)
+ return t;
+ return TYPE_MCST;
}
else
- return may_trap_p (SET_SRC (single_set (insn)));
+ return get_attr_type (insn);
}
-/* This function acts like NEXT_INSN, but is aware of three-insn bundles and
- skips all subsequent parallel instructions if INSN is the start of such
- a group. */
-static rtx
-find_next_insn_start (rtx insn)
+/* Return true iff the address found in MEM is based on the register
+ NP_REG and optionally has a positive offset. */
+static bool
+harmless_null_pointer_p (rtx mem, int np_reg)
{
- if (GET_MODE (insn) == SImode)
+ mem = XEXP (mem, 0);
+ if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC)
+ mem = XEXP (mem, 0);
+ if (REG_P (mem) && REGNO (mem) == np_reg)
+ return true;
+ if (GET_CODE (mem) == PLUS
+ && REG_P (XEXP (mem, 0)) && REGNO (XEXP (mem, 0)) == np_reg)
{
- while (GET_MODE (insn) != QImode)
- insn = NEXT_INSN (insn);
+ mem = XEXP (mem, 1);
+ if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0)
+ return true;
}
- return NEXT_INSN (insn);
+ return false;
+}
+
+/* Return nonzero if INSN contains any loads that may trap. */
+
+static bool
+trapping_loads_p (rtx insn, int np_reg, bool after_np_branch)
+{
+ rtx pat = PATTERN (insn);
+ rtx mem = SET_SRC (single_set (insn));
+
+ if (!after_np_branch)
+ np_reg = -1;
+ return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg))
+ && may_trap_p (mem));
}
/* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of
static rtx
find_load (rtx insn)
{
+ if (!NONDEBUG_INSN_P (insn))
+ return NULL_RTX;
if (get_attr_type (insn) == TYPE_MCLD)
return insn;
if (GET_MODE (insn) != SImode)
return NULL_RTX;
}
-/* We use the machine specific reorg pass for emitting CSYNC instructions
- after conditional branches as needed.
-
- The Blackfin is unusual in that a code sequence like
- if cc jump label
- r0 = (p0)
- may speculatively perform the load even if the condition isn't true. This
- happens for a branch that is predicted not taken, because the pipeline
- isn't flushed or stalled, so the early stages of the following instructions,
- which perform the memory reference, are allowed to execute before the
- jump condition is evaluated.
- Therefore, we must insert additional instructions in all places where this
- could lead to incorrect behavior. The manual recommends CSYNC, while
- VDSP seems to use NOPs (even though its corresponding compiler option is
- named CSYNC).
+/* Determine whether PAT is an indirect call pattern. */
+static bool
+indirect_call_p (rtx pat)
+{
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET)
+ pat = SET_SRC (pat);
+ gcc_assert (GET_CODE (pat) == CALL);
+ pat = XEXP (pat, 0);
+ gcc_assert (GET_CODE (pat) == MEM);
+ pat = XEXP (pat, 0);
+
+ return REG_P (pat);
+}
- When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
- When optimizing for size, we turn the branch into a predicted taken one.
- This may be slower due to mispredicts, but saves code size. */
+/* During workaround_speculation, track whether we're in the shadow of a
+ conditional branch that tests a P register for NULL. If so, we can omit
+ emitting NOPs if we see a load from that P register, since a speculative
+ access at address 0 isn't a problem, and the load is executed in all other
+ cases anyway.
+ Global for communication with note_np_check_stores through note_stores.
+ */
+int np_check_regno = -1;
+bool np_after_branch = false;
+
+/* Subroutine of workaround_speculation, called through note_stores. */
+static void
+note_np_check_stores (rtx x, const_rtx pat, void *data ATTRIBUTE_UNUSED)
+{
+ if (REG_P (x) && (REGNO (x) == REG_CC || REGNO (x) == np_check_regno))
+ np_check_regno = -1;
+}
static void
-bfin_reorg (void)
+workaround_speculation (void)
{
rtx insn, next;
rtx last_condjump = NULL_RTX;
int cycles_since_jump = INT_MAX;
+ int delay_added = 0;
- /* We are freeing block_for_insn in the toplev to keep compatibility
- with old MDEP_REORGS that are not CFG based. Recompute it now. */
- compute_bb_for_insn ();
-
- if (bfin_flag_schedule_insns2)
- {
- splitting_for_sched = 1;
- split_all_insns ();
- splitting_for_sched = 0;
-
- timevar_push (TV_SCHED2);
- schedule_insns ();
- timevar_pop (TV_SCHED2);
-
- /* Examine the schedule and insert nops as necessary for 64-bit parallel
- instructions. */
- bfin_gen_bundles ();
- }
-
- df_analyze ();
-
- /* Doloop optimization */
- if (cfun->machine->has_hardware_loops)
- bfin_reorg_loops (dump_file);
-
- if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS)
+ if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+ && ! ENABLE_WA_INDIRECT_CALLS)
return;
/* First pass: find predicted-false branches; if something after them
for (insn = get_insns (); insn; insn = next)
{
rtx pat;
+ int delay_needed = 0;
next = find_next_insn_start (insn);
- if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ if (NOTE_P (insn) || BARRIER_P (insn))
continue;
+ if (LABEL_P (insn))
+ {
+ np_check_regno = -1;
+ continue;
+ }
+
pat = PATTERN (insn);
if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
- || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
- || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ || GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
continue;
+
+ if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
+ {
+ np_check_regno = -1;
+ continue;
+ }
if (JUMP_P (insn))
{
+ /* Is this a condjump based on a null pointer comparison we saw
+ earlier? */
+ if (np_check_regno != -1
+ && recog_memoized (insn) == CODE_FOR_cbranchbi4)
+ {
+ rtx op = XEXP (SET_SRC (PATTERN (insn)), 0);
+ gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE);
+ if (GET_CODE (op) == NE)
+ np_after_branch = true;
+ }
if (any_condjump_p (insn)
&& ! cbranch_predicted_taken_p (insn))
{
last_condjump = insn;
+ delay_added = 0;
cycles_since_jump = 0;
}
else
cycles_since_jump = INT_MAX;
}
- else if (INSN_P (insn))
+ else if (CALL_P (insn))
+ {
+ np_check_regno = -1;
+ if (cycles_since_jump < INT_MAX)
+ cycles_since_jump++;
+ if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
+ {
+ delay_needed = 3;
+ }
+ }
+ else if (NONDEBUG_INSN_P (insn))
{
rtx load_insn = find_load (insn);
enum attr_type type = type_for_anomaly (insn);
- int delay_needed = 0;
+
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
+ /* Detect a comparison of a P register with zero. If we later
+ see a condjump based on it, we have found a null pointer
+ check. */
+ if (recog_memoized (insn) == CODE_FOR_compare_eq)
+ {
+ rtx src = SET_SRC (PATTERN (insn));
+ if (REG_P (XEXP (src, 0))
+ && P_REGNO_P (REGNO (XEXP (src, 0)))
+ && XEXP (src, 1) == const0_rtx)
+ {
+ np_check_regno = REGNO (XEXP (src, 0));
+ np_after_branch = false;
+ }
+ else
+ np_check_regno = -1;
+ }
+
if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
{
- if (trapping_loads_p (load_insn))
- delay_needed = 3;
+ if (trapping_loads_p (load_insn, np_check_regno,
+ np_after_branch))
+ delay_needed = 4;
}
else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
- delay_needed = 4;
+ delay_needed = 3;
- if (delay_needed > cycles_since_jump)
+ /* See if we need to forget about a null pointer comparison
+ we found earlier. */
+ if (recog_memoized (insn) != CODE_FOR_compare_eq)
{
- rtx pat;
- int num_clobbers;
- rtx *op = recog_data.operand;
-
- delay_needed -= cycles_since_jump;
-
- extract_insn (last_condjump);
- if (optimize_size)
+ note_stores (PATTERN (insn), note_np_check_stores, NULL);
+ if (np_check_regno != -1)
{
- pat = gen_cbranch_predicted_taken (op[0], op[1], op[2],
- op[3]);
- cycles_since_jump = INT_MAX;
+ if (find_regno_note (insn, REG_INC, np_check_regno))
+ np_check_regno = -1;
}
- else
- /* Do not adjust cycles_since_jump in this case, so that
- we'll increase the number of NOPs for a subsequent insn
- if necessary. */
- pat = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
- GEN_INT (delay_needed));
- PATTERN (last_condjump) = pat;
- INSN_CODE (last_condjump) = recog (pat, insn, &num_clobbers);
}
+
+ }
+
+ if (delay_needed > cycles_since_jump
+ && (delay_needed - cycles_since_jump) > delay_added)
+ {
+ rtx pat1;
+ int num_clobbers;
+ rtx *op = recog_data.operand;
+
+ delay_needed -= cycles_since_jump;
+
+ extract_insn (last_condjump);
+ if (optimize_size)
+ {
+ pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
+ op[3]);
+ cycles_since_jump = INT_MAX;
+ }
+ else
+ {
+ /* Do not adjust cycles_since_jump in this case, so that
+ we'll increase the number of NOPs for a subsequent insn
+ if necessary. */
+ pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
+ GEN_INT (delay_needed));
+ delay_added = delay_needed;
+ }
+ PATTERN (last_condjump) = pat1;
+ INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
+ }
+ if (CALL_P (insn))
+ {
+ cycles_since_jump = INT_MAX;
+ delay_added = 0;
}
}
+
/* Second pass: for predicted-true branches, see if anything at the
branch destination needs extra nops. */
- if (! ENABLE_WA_SPECULATIVE_SYNCS)
- return;
-
- if (! ENABLE_WA_RETS)
- return;
-
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
+ int cycles_since_jump;
if (JUMP_P (insn)
&& any_condjump_p (insn)
&& (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
{
rtx target = JUMP_LABEL (insn);
rtx label = target;
+ rtx next_tgt;
+
cycles_since_jump = 0;
- for (; target && cycles_since_jump < 3; target = NEXT_INSN (target))
+ for (; target && cycles_since_jump < 3; target = next_tgt)
{
rtx pat;
+ next_tgt = find_next_insn_start (target);
+
if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
continue;
|| GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
continue;
- if (INSN_P (target))
+ if (NONDEBUG_INSN_P (target))
{
+ rtx load_insn = find_load (target);
enum attr_type type = type_for_anomaly (target);
int delay_needed = 0;
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
- if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+ if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+ {
+ if (trapping_loads_p (load_insn, -1, false))
+ delay_needed = 2;
+ }
+ else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
delay_needed = 2;
if (delay_needed > cycles_since_jump)
}
}
}
+}
+
+/* Called just before the final scheduling pass. If we need to insert NOPs
+ later on to work around speculative loads, insert special placeholder
+ insns that cause loads to be delayed for as many cycles as necessary
+ (and possible). This reduces the number of NOPs we need to add.
+ The dummy insns we generate are later removed by bfin_gen_bundles. */
+static void
+add_sched_insns_for_speculation (void)
+{
+ rtx insn;
+
+ if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+ && ! ENABLE_WA_INDIRECT_CALLS)
+ return;
+
+ /* First pass: find predicted-false branches; if something after them
+ needs nops, insert them or change the branch to predict true. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx pat;
+
+ if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ continue;
+
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ {
+ if (any_condjump_p (insn)
+ && !cbranch_predicted_taken_p (insn))
+ {
+ rtx n = next_real_insn (insn);
+ emit_insn_before (gen_stall (GEN_INT (3)), n);
+ }
+ }
+ }
+
+ /* Second pass: for predicted-true branches, see if anything at the
+ branch destination needs extra nops. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (JUMP_P (insn)
+ && any_condjump_p (insn)
+ && (cbranch_predicted_taken_p (insn)))
+ {
+ rtx target = JUMP_LABEL (insn);
+ rtx next = next_real_insn (target);
+
+ if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+ && get_attr_type (next) == TYPE_STALL)
+ continue;
+ emit_insn_before (gen_stall (GEN_INT (1)), next);
+ }
+ }
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+ after conditional branches as needed.
+
+ The Blackfin is unusual in that a code sequence like
+ if cc jump label
+ r0 = (p0)
+ may speculatively perform the load even if the condition isn't true. This
+ happens for a branch that is predicted not taken, because the pipeline
+ isn't flushed or stalled, so the early stages of the following instructions,
+ which perform the memory reference, are allowed to execute before the
+ jump condition is evaluated.
+ Therefore, we must insert additional instructions in all places where this
+ could lead to incorrect behavior. The manual recommends CSYNC, while
+ VDSP seems to use NOPs (even though its corresponding compiler option is
+ named CSYNC).
+
+ When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+ When optimizing for size, we turn the branch into a predicted taken one.
+ This may be slower due to mispredicts, but saves code size. */
+
+static void
+bfin_reorg (void)
+{
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+
+ if (bfin_flag_schedule_insns2)
+ {
+ splitting_for_sched = 1;
+ split_all_insns ();
+ splitting_for_sched = 0;
+
+ add_sched_insns_for_speculation ();
+
+ timevar_push (TV_SCHED2);
+ if (flag_selective_scheduling2
+ && !maybe_skip_selective_scheduling ())
+ run_selective_scheduling ();
+ else
+ schedule_insns ();
+ timevar_pop (TV_SCHED2);
+
+ /* Examine the schedule and insert nops as necessary for 64-bit parallel
+ instructions. */
+ bfin_gen_bundles ();
+ }
+
+ df_analyze ();
+
+ /* Doloop optimization */
+ if (cfun->machine->has_hardware_loops)
+ bfin_reorg_loops (dump_file);
+
+ workaround_speculation ();
if (bfin_flag_var_tracking)
{
reorder_var_tracking_notes ();
timevar_pop (TV_VAR_TRACKING);
}
+
df_finish_pass (false);
+
+ workaround_rts_anomaly ();
}
\f
/* Handle interrupt_handler, exception_handler and nmi_handler function
if (TREE_CODE (x) != FUNCTION_TYPE)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
}
else if (funkind (x) != SUBROUTINE)
&& TREE_CODE (*node) != FIELD_DECL
&& TREE_CODE (*node) != TYPE_DECL)
{
- warning (OPT_Wattributes, "`%s' attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
}
if (TREE_CODE (decl) != FUNCTION_DECL)
{
- error ("`%s' attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ error ("%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
}
if (TREE_CODE (decl) != VAR_DECL)
{
- error ("`%s' attribute only applies to variables",
- IDENTIFIER_POINTER (name));
+ error ("%qE attribute only applies to variables",
+ name);
*no_add_attrs = true;
}
else if (current_function_decl != NULL_TREE
&& !TREE_STATIC (decl))
{
- error ("`%s' attribute cannot be specified for local variables",
- IDENTIFIER_POINTER (name));
+ error ("%qE attribute cannot be specified for local variables",
+ name);
*no_add_attrs = true;
}
else
return NULL_TREE;
}
+/* Handle a "l2" attribute; arguments as in struct attribute_spec.handler. */
+
+static tree
+bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name),
+ tree ARG_UNUSED (args), int ARG_UNUSED (flags),
+ bool *no_add_attrs)
+{
+ tree decl = *node;
+
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ {
+ if (DECL_SECTION_NAME (decl) != NULL_TREE
+ && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+ ".l2.text") != 0)
+ {
+ error ("section of %q+D conflicts with previous declaration",
+ decl);
+ *no_add_attrs = true;
+ }
+ else
+ DECL_SECTION_NAME (decl) = build_string (9, ".l2.text");
+ }
+ else if (TREE_CODE (decl) == VAR_DECL)
+ {
+ if (DECL_SECTION_NAME (decl) != NULL_TREE
+ && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+ ".l2.data") != 0)
+ {
+ error ("section of %q+D conflicts with previous declaration",
+ decl);
+ *no_add_attrs = true;
+ }
+ else
+ DECL_SECTION_NAME (decl) = build_string (9, ".l2.data");
+ }
+
+ return NULL_TREE;
+}
+
/* Table of valid machine attributes. */
-const struct attribute_spec bfin_attribute_table[] =
+static const struct attribute_spec bfin_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
{ "interrupt_handler", 0, 0, false, true, true, handle_int_attribute },
{ "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
{ "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
{ "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
+ { "l2", 0, 0, true, false, false, bfin_handle_l2_attribute },
{ NULL, 0, 0, false, false, false, NULL }
};
\f
{
rtx xops[3];
/* The this parameter is passed as the first argument. */
- rtx this = gen_rtx_REG (Pmode, REG_R0);
+ rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
/* Adjust the this parameter by a fixed constant. */
if (delta)
{
- xops[1] = this;
+ xops[1] = this_rtx;
if (delta >= -64 && delta <= 63)
{
xops[0] = GEN_INT (delta);
output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
xops[0] = gen_rtx_MEM (Pmode, p2tmp);
}
- xops[2] = this;
+ xops[2] = this_rtx;
output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
}
BFIN_BUILTIN_CPLX_SQU,
+ BFIN_BUILTIN_LOADBYTES,
+
BFIN_BUILTIN_MAX
};
tree short_ftype_v2hi
= build_function_type_list (short_integer_type_node, V2HI_type_node,
NULL_TREE);
-
+ tree int_ftype_pint
+ = build_function_type_list (integer_type_node,
+ build_pointer_type (integer_type_node),
+ NULL_TREE);
+
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
BFIN_BUILTIN_CPLX_MSU_16_S40);
def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
BFIN_BUILTIN_CPLX_SQU);
+
+ /* "Unaligned" load. */
+ def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
+ BFIN_BUILTIN_LOADBYTES);
+
}
{ CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
{ CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
{ CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
- { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE }
+ { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
+
+ { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
+ { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
+ { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
+ { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
+
};
static const struct builtin_description bdesc_1arg[] =
{
+ { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
+
{ CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
{ CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
if (! target
|| !register_operand (target, SImode))
target = gen_reg_rtx (SImode);
+ if (! register_operand (op0, SImode))
+ op0 = copy_to_mode_reg (SImode, op0);
+ if (! register_operand (op1, SImode))
+ op1 = copy_to_mode_reg (SImode, op1);
a1reg = gen_rtx_REG (PDImode, REG_A1);
a0reg = gen_rtx_REG (PDImode, REG_A0);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
accvec = gen_reg_rtx (V2PDImode);
+ icode = CODE_FOR_flag_macv2hi_parts;
if (! target
|| GET_MODE (target) != V2HImode
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
accvec = gen_reg_rtx (V2PDImode);
+ icode = CODE_FOR_flag_macv2hi_parts;
if (! target
|| GET_MODE (target) != V2HImode
emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
- emit_insn (gen_flag_mulhi_parts (tmp2, op0, op0, const0_rtx,
+ emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
const0_rtx, const1_rtx,
GEN_INT (MACFLAG_NONE)));
- emit_insn (gen_ssaddhi3_parts (target, tmp2, tmp2, const1_rtx,
- const0_rtx, const0_rtx));
-
- emit_insn (gen_sssubhi3_parts (target, tmp1, tmp1, const0_rtx,
- const0_rtx, const1_rtx));
+ emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
+ const0_rtx));
+ emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
+ const0_rtx, const1_rtx));
return target;
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
-#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
-#undef TARGET_PROMOTE_FUNCTION_ARGS
-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
-#undef TARGET_PROMOTE_FUNCTION_RETURN
-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE bfin_can_eliminate
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT bfin_trampoline_init
+
struct gcc_target targetm = TARGET_INITIALIZER;