/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
- Copyright (C) 1994-2017 Free Software Foundation, Inc.
+ Copyright (C) 1994-2019 Free Software Foundation, Inc.
Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
behalf of Synopsys Inc.
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+#define IN_TARGET_CODE 1
+
#include "config.h"
#include "system.h"
#include "coretypes.h"
static char arc_cpu_name[10] = "";
static const char *arc_cpu_string = arc_cpu_name;
+typedef struct GTY (()) _arc_jli_section
+{
+ const char *name;
+ struct _arc_jli_section *next;
+} arc_jli_section;
+
+static arc_jli_section *arc_jli_sections = NULL;
+
+/* Track which regs are set fixed/call saved/call used from commnad line. */
+HARD_REG_SET overrideregs;
+
/* Maximum size of a loop. */
#define ARC_MAX_LOOP_LENGTH 4095
-/* ??? Loads can handle any constant, stores can only handle small ones. */
-/* OTOH, LIMMs cost extra, so their usefulness is limited. */
-#define RTX_OK_FOR_OFFSET_P(MODE, X) \
-(GET_CODE (X) == CONST_INT \
- && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
- (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
- ? 0 \
- : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
-
-#define LEGITIMATE_SMALL_DATA_OFFSET_P(X) \
- (GET_CODE (X) == CONST \
- && GET_CODE (XEXP ((X), 0)) == PLUS \
- && GET_CODE (XEXP (XEXP ((X), 0), 0)) == SYMBOL_REF \
- && SYMBOL_REF_SMALL_P (XEXP (XEXP ((X), 0), 0)) \
- && GET_CODE (XEXP(XEXP ((X), 0), 1)) == CONST_INT \
- && INTVAL (XEXP (XEXP ((X), 0), 1)) <= g_switch_value)
-
-#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
- (GET_CODE (X) == PLUS \
- && REG_P (XEXP ((X), 0)) \
- && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM \
- && ((GET_CODE (XEXP ((X), 1)) == SYMBOL_REF \
- && SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
- || LEGITIMATE_SMALL_DATA_OFFSET_P (XEXP ((X), 1))))
+/* Check if an rtx fits in the store instruction format. Loads can
+ handle any constant. */
+#define RTX_OK_FOR_OFFSET_P(MODE, X) \
+ (GET_CODE (X) == CONST_INT \
+ && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & (~0x03), \
+ (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
+ ? 0 \
+ : -(-GET_MODE_SIZE (MODE) | (~0x03)) >> 1)))
/* Array of valid operand punctuation characters. */
char arc_punct_chars[256];
|| get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
: get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
+/* Start enter/leave register range. */
+#define ENTER_LEAVE_START_REG 13
+
+/* End enter/leave register range. */
+#define ENTER_LEAVE_END_REG 26
+
/* The maximum number of insns skipped which will be conditionalised if
possible. */
/* When optimizing for speed:
static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_jli_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *);
+static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *);
/* Initialized arc_attribute_table to NULL since arc doesnot have any
machine specific supported attributes. */
const struct attribute_spec arc_attribute_table[] =
{
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
- affects_type_identity } */
- { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+ affects_type_identity, handler, exclude } */
+ { "interrupt", 1, 1, true, false, false, true,
+ arc_handle_interrupt_attribute, NULL },
/* Function calls made to this symbol must be done indirectly, because
it may lie outside of the 21/25 bit addressing range of a normal function
call. */
- { "long_call", 0, 0, false, true, true, NULL, false },
+ { "long_call", 0, 0, false, true, true, false, NULL, NULL },
/* Whereas these functions are always known to reside within the 25 bit
addressing range of unconditionalized bl. */
- { "medium_call", 0, 0, false, true, true, NULL, false },
+ { "medium_call", 0, 0, false, true, true, false, NULL, NULL },
/* And these functions are always known to reside within the 21 bit
addressing range of blcc. */
- { "short_call", 0, 0, false, true, true, NULL, false },
+ { "short_call", 0, 0, false, true, true, false, NULL, NULL },
/* Function which are not having the prologue and epilogue generated
by the compiler. */
- { "naked", 0, 0, true, false, false, arc_handle_fndecl_attribute, false },
- { NULL, 0, 0, false, false, false, NULL, false }
+ { "naked", 0, 0, true, false, false, false, arc_handle_fndecl_attribute,
+ NULL },
+ /* Functions calls made using jli instruction. The pointer in JLI
+ table is found latter. */
+ { "jli_always", 0, 0, false, true, true, false, NULL, NULL },
+ /* Functions calls made using jli instruction. The pointer in JLI
+ table is given as input parameter. */
+ { "jli_fixed", 1, 1, false, true, true, false, arc_handle_jli_attribute,
+ NULL },
+ /* Call a function using secure-mode. */
+ { "secure_call", 1, 1, false, true, true, false, arc_handle_secure_attribute,
+ NULL },
+ /* Bypass caches using .di flag. */
+ { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute,
+ NULL },
+ { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL },
+ { NULL, 0, 0, false, false, false, false, NULL, NULL }
};
static int arc_comp_type_attributes (const_tree, const_tree);
static void arc_file_start (void);
/* Globally visible information about currently selected cpu. */
const arc_cpu_t *arc_selected_cpu;
+/* Given a symbol RTX (const (symb <+ const_int>), returns its
+ alignment. */
+
+static int
+get_symbol_alignment (rtx x)
+{
+ tree decl = NULL_TREE;
+ int align = 0;
+
+ switch (GET_CODE (x))
+ {
+ case SYMBOL_REF:
+ decl = SYMBOL_REF_DECL (x);
+ break;
+ case CONST:
+ return get_symbol_alignment (XEXP (x, 0));
+ case PLUS:
+ gcc_assert (CONST_INT_P (XEXP (x, 1)));
+ return get_symbol_alignment (XEXP (x, 0));
+ default:
+ return 0;
+ }
+
+ if (decl)
+ align = DECL_ALIGN (decl);
+ align = align / BITS_PER_UNIT;
+ return align;
+}
+
+/* Return true if x is ok to be used as a small data address. */
+
+static bool
+legitimate_small_data_address_p (rtx x)
+{
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ return legitimate_small_data_address_p (XEXP (x, 0));
+ case SYMBOL_REF:
+ return SYMBOL_REF_SMALL_P (x);
+ case PLUS:
+ {
+ bool p0 = (GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
+ && SYMBOL_REF_SMALL_P (XEXP (x, 0));
+ bool p1 = CONST_INT_P (XEXP (x, 1))
+ && (INTVAL (XEXP (x, 1)) <= g_switch_value);
+ return p0 && p1;
+ }
+ default:
+ return false;
+ }
+}
+
+/* TRUE if op is an scaled address. */
static bool
legitimate_scaled_address_p (machine_mode mode, rtx op, bool strict)
{
case 4:
if (INTVAL (XEXP (XEXP (op, 0), 1)) != 4)
return false;
+ /* Fall through. */
default:
return false;
}
return true;
return false;
}
+
+ /* Scalled addresses for sdata is done other places. */
+ if (legitimate_small_data_address_p (op))
+ return false;
+
if (CONSTANT_P (XEXP (op, 1)))
- {
- /* Scalled addresses for sdata is done other places. */
- if (GET_CODE (XEXP (op, 1)) == SYMBOL_REF
- && SYMBOL_REF_SMALL_P (XEXP (op, 1)))
- return false;
return true;
- }
return false;
}
/* Implements target hook
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
-static unsigned int
-arc_autovectorize_vector_sizes (void)
+static void
+arc_autovectorize_vector_sizes (vector_sizes *sizes)
+{
+ if (TARGET_PLUS_QMACW)
+ {
+ sizes->quick_push (8);
+ sizes->quick_push (4);
+ }
+}
+
+
+/* Implements target hook TARGET_SCHED_ISSUE_RATE. */
+static int
+arc_sched_issue_rate (void)
{
- return TARGET_PLUS_QMACW ? (8 | 4) : 0;
+ switch (arc_tune)
+ {
+ case TUNE_ARCHS4X:
+ case TUNE_ARCHS4XD:
+ return 3;
+ default:
+ break;
+ }
+ return 1;
}
/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
const_tree, bool);
static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
-static void arc_finalize_pic (void);
-
/* initialize the GCC target structure. */
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
+
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
-#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
-
#define TARGET_CAN_ELIMINATE arc_can_eliminate
#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
-#define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
-
-#define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
-
#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P \
arc_no_speculation_in_delay_slots_p
#undef TARGET_DWARF_REGISTER_SPAN
#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS arc_hard_regno_nregs
#undef TARGET_HARD_REGNO_MODE_OK
#define TARGET_HARD_REGNO_MODE_OK arc_hard_regno_mode_ok
#undef TARGET_MODES_TIEABLE_P
#define TARGET_MODES_TIEABLE_P arc_modes_tieable_p
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE arc_builtin_setjmp_frame_value
/* Try to keep the (mov:DF _, reg) as early as possible so
that the d<add/sub/mul>h-lr insns appear together and can
if (cl == DOUBLE_REGS)
return GENERAL_REGS;
- /* The loop counter register can be stored, but not loaded directly. */
- if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
- && in_p && MEM_P (x))
- return GENERAL_REGS;
-
/* If we have a subreg (reg), where reg is a pseudo (that will end in
a memory location), then we may need a scratch register to handle
the fp/sp+largeoffset address. */
if (regno != -1)
return NO_REGS;
- /* It is a pseudo that ends in a stack location. */
- if (reg_equiv_mem (REGNO (x)))
+ /* It is a pseudo that ends in a stack location. This
+ procedure only works with the old reload step. */
+ if (reg_equiv_mem (REGNO (x)) && !lra_in_progress)
{
/* Get the equivalent address and check the range of the
offset. */
if (arc_multcost < 0)
switch (arc_tune)
{
- case TUNE_ARC700_4_2_STD:
+ case ARC_TUNE_ARC700_4_2_STD:
/* latency 7;
max throughput (1 multiply + 4 other insns) / 5 cycles. */
arc_multcost = COSTS_N_INSNS (4);
if (TARGET_NOMPY_SET)
arc_multcost = COSTS_N_INSNS (30);
break;
- case TUNE_ARC700_4_2_XMAC:
+ case ARC_TUNE_ARC700_4_2_XMAC:
/* latency 5;
max throughput (1 multiply + 2 other insns) / 3 cycles. */
arc_multcost = COSTS_N_INSNS (3);
if (TARGET_NOMPY_SET)
arc_multcost = COSTS_N_INSNS (30);
break;
- case TUNE_ARC600:
+ case ARC_TUNE_ARC600:
if (TARGET_MUL64_SET)
{
arc_multcost = COSTS_N_INSNS (4);
/* MPY instructions valid only for ARC700 or ARCv2. */
if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
- error ("-mno-mpy supported only for ARC700 or ARCv2");
+ error ("%<-mno-mpy%> supported only for ARC700 or ARCv2");
if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
- error ("-mno-dpfp-lrsr supported only with -mdpfp");
+ error ("%<-mno-dpfp-lrsr%> supported only with %<-mdpfp%>");
/* FPX-1. No fast and compact together. */
if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
/* FPX-2. No fast-spfp for arc600 or arc601. */
if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
- error ("-mspfp_fast not available on ARC600 or ARC601");
+ error ("%<-mspfp_fast%> not available on ARC600 or ARC601");
/* FPX-4. No FPX extensions mixed with FPU extensions. */
if ((TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET || TARGET_SPFP)
/* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic. */
if (flag_pic && TARGET_ARC600_FAMILY)
{
- warning (DK_WARNING,
+ warning (0,
"PIC is not supported for %s. Generating non-PIC code only..",
arc_cpu_string);
flag_pic = 0;
dash = strchr (str, '-');
if (!dash)
{
- warning (0, "value of -mirq-ctrl-saved must have form R0-REGx");
+ warning (OPT_mirq_ctrl_saved_, "missing dash");
return;
}
*dash = '\0';
first = decode_reg_name (str);
if (first != 0)
{
- warning (0, "first register must be R0");
+ warning (OPT_mirq_ctrl_saved_, "first register must be R0");
return;
}
if (last < 0)
{
- warning (0, "unknown register name: %s", dash + 1);
+ warning (OPT_mirq_ctrl_saved_, "unknown register name: %s", dash + 1);
return;
}
if (!(last & 0x01))
{
- warning (0, "last register name %s must be an odd register", dash + 1);
+ warning (OPT_mirq_ctrl_saved_,
+ "last register name %s must be an odd register", dash + 1);
return;
}
if (first > last)
{
- warning (0, "%s-%s is an empty range", str, dash + 1);
+ warning (OPT_mirq_ctrl_saved_,
+ "%s-%s is an empty range", str, dash + 1);
return;
}
break;
default:
- warning (0, "unknown register name: %s", str);
+ warning (OPT_mirq_ctrl_saved_,
+ "unknown register name: %s", str);
return;
}
}
if (errno != 0 || *arg == '\0' || *end_ptr != '\0'
|| (val != 0 && val != 4 && val != 8 && val != 16 && val != 32))
{
- error ("invalid number in -mrgf-banked-regs=%s "
+ error ("invalid number in %<-mrgf-banked-regs=%s%> "
"valid values are 0, 4, 8, 16, or 32", arg);
return;
}
if (TARGET_V2)
irq_range (opt->arg);
else
- warning (0, "option -mirq-ctrl-saved valid only for ARC v2 processors");
+ warning (OPT_mirq_ctrl_saved_,
+ "option %<-mirq-ctrl-saved%> valid only "
+ "for ARC v2 processors");
break;
case OPT_mrgf_banked_regs_:
if (TARGET_V2)
parse_mrgf_banked_regs_option (opt->arg);
else
- warning (0, "option -mrgf-banked-regs valid only for ARC v2 processors");
+ warning (OPT_mrgf_banked_regs_,
+ "option %<-mrgf-banked-regs%> valid only for "
+ "ARC v2 processors");
break;
default:
}
}
+ CLEAR_HARD_REG_SET (overrideregs);
+ if (common_deferred_options)
+ {
+ vec<cl_deferred_option> v =
+ *((vec<cl_deferred_option> *) common_deferred_options);
+ int reg, nregs, j;
+
+ FOR_EACH_VEC_ELT (v, i, opt)
+ {
+ switch (opt->opt_index)
+ {
+ case OPT_ffixed_:
+ case OPT_fcall_used_:
+ case OPT_fcall_saved_:
+ if ((reg = decode_reg_name_and_count (opt->arg, &nregs)) >= 0)
+ for (j = reg; j < reg + nregs; j++)
+ SET_HARD_REG_BIT (overrideregs, j);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Check options against architecture options. Throw an error if
+ option is not allowed. Extra, check options against default
+ architecture/cpu flags and throw an warning if we find a
+ mismatch. */
+#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1) \
+ do { \
+ if ((!(arc_selected_cpu->arch_info->flags & CODE)) \
+ && (VAR == VAL)) \
+ error ("Option %s=%s is not available for %s CPU.", \
+ DOC0, DOC1, arc_selected_cpu->name); \
+ if ((arc_selected_cpu->arch_info->dflags & CODE) \
+ && (VAR != DEFAULT_##VAR) \
+ && (VAR != VAL)) \
+ warning (0, "Option %s is ignored, the default value %s" \
+ " is considered for %s CPU.", DOC0, DOC1, \
+ arc_selected_cpu->name); \
+ } while (0);
+#define ARC_OPT(NAME, CODE, MASK, DOC) \
+ do { \
+ if ((!(arc_selected_cpu->arch_info->flags & CODE)) \
+ && (target_flags & MASK)) \
+ error ("Option %s is not available for %s CPU", \
+ DOC, arc_selected_cpu->name); \
+ if ((arc_selected_cpu->arch_info->dflags & CODE) \
+ && (target_flags_explicit & MASK) \
+ && (!(target_flags & MASK))) \
+ warning (0, "Unset option %s is ignored, it is always" \
+ " enabled for %s CPU.", DOC, \
+ arc_selected_cpu->name); \
+ } while (0);
+
+#include "arc-options.def"
+
+#undef ARC_OPTX
+#undef ARC_OPT
+
/* Set cpu flags accordingly to architecture/selected cpu. The cpu
specific flags are set in arc-common.c. The architecture forces
the default hardware configurations in, regardless what command
if (arc_selected_cpu->arch_info->dflags & CODE) \
target_flags |= MASK; \
} while (0);
-#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC) \
+#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC0, DOC1) \
do { \
if ((arc_selected_cpu->flags & CODE) \
&& (VAR == DEFAULT_##VAR)) \
#undef ARC_OPTX
#undef ARC_OPT
- /* Check options against architecture options. Throw an error if
- option is not allowed. */
-#define ARC_OPTX(NAME, CODE, VAR, VAL, DOC) \
- do { \
- if ((VAR == VAL) \
- && (!(arc_selected_cpu->arch_info->flags & CODE))) \
- { \
- error ("%s is not available for %s architecture", \
- DOC, arc_selected_cpu->arch_info->name); \
- } \
- } while (0);
-#define ARC_OPT(NAME, CODE, MASK, DOC) \
- do { \
- if ((target_flags & MASK) \
- && (!(arc_selected_cpu->arch_info->flags & CODE))) \
- error ("%s is not available for %s architecture", \
- DOC, arc_selected_cpu->arch_info->name); \
- } while (0);
-
-#include "arc-options.def"
-
-#undef ARC_OPTX
-#undef ARC_OPT
+ /* Set extras. */
+ switch (arc_selected_cpu->extra)
+ {
+ case HAS_LPCOUNT_16:
+ arc_lpcwidth = 16;
+ break;
+ default:
+ break;
+ }
/* Set Tune option. */
- if (arc_tune == TUNE_NONE)
- arc_tune = (enum attr_tune) arc_selected_cpu->tune;
+ if (arc_tune == ARC_TUNE_NONE)
+ arc_tune = (enum arc_tune_attr) arc_selected_cpu->tune;
if (arc_size_opt_level == 3)
optimize_size = 1;
- /* Compact casesi is not a valid option for ARCv2 family. */
- if (TARGET_V2)
- {
- if (TARGET_COMPACT_CASESI)
- {
- warning (0, "compact-casesi is not applicable to ARCv2");
- TARGET_COMPACT_CASESI = 0;
- }
- }
- else if (optimize_size == 1
- && !global_options_set.x_TARGET_COMPACT_CASESI)
- TARGET_COMPACT_CASESI = 1;
-
if (flag_pic)
target_flags |= MASK_NO_SDATA_SET;
if (flag_no_common == 255)
flag_no_common = !TARGET_NO_SDATA_SET;
- /* TARGET_COMPACT_CASESI needs the "q" register class. */
if (TARGET_MIXED_CODE)
TARGET_Q_CLASS = 1;
- if (!TARGET_Q_CLASS)
- TARGET_COMPACT_CASESI = 0;
- if (TARGET_COMPACT_CASESI)
- TARGET_CASE_VECTOR_PC_RELATIVE = 1;
/* Check for small data option */
if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET)
g_switch_value = TARGET_LL64 ? 8 : 4;
+ /* A7 has an issue with delay slots. */
+ if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+ flag_delayed_branch = 0;
+
+ /* Millicode thunks doesn't work with long calls. */
+ if (TARGET_LONG_CALLS_SET)
+ target_flags &= ~MASK_MILLICODE_THUNK_SET;
+
+ /* Set unaligned to all HS cpus. */
+ if (!global_options_set.x_unaligned_access && TARGET_HS)
+ unaligned_access = 1;
+
/* These need to be done at start up. It's convenient to do them here. */
arc_init ();
}
enum reg_class
arc_preferred_reload_class (rtx, enum reg_class cl)
{
- if ((cl) == CHEAP_CORE_REGS || (cl) == WRITABLE_CORE_REGS)
- return GENERAL_REGS;
return cl;
}
/* For ARCv2 the core register set is changed. */
strcpy (rname29, "ilink");
strcpy (rname30, "r30");
- call_used_regs[30] = 1;
- fixed_regs[30] = 0;
-
- arc_regno_reg_class[30] = WRITABLE_CORE_REGS;
- SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], 30);
- SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], 30);
- SET_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], 30);
- SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], 30);
+
+ if (!TEST_HARD_REG_BIT (overrideregs, R30_REG))
+ {
+ /* No user interference. Set the r30 to be used by the
+ compiler. */
+ call_used_regs[R30_REG] = 1;
+ fixed_regs[R30_REG] = 0;
+
+ arc_regno_reg_class[R30_REG] = GENERAL_REGS;
+ }
}
if (TARGET_MUL64_SET)
{
- fix_start = 57;
- fix_end = 59;
+ fix_start = R57_REG;
+ fix_end = R59_REG;
/* We don't provide a name for mmed. In rtl / assembly resource lists,
you are supposed to refer to it as mlo & mhi, e.g
if (TARGET_MULMAC_32BY16_SET)
{
- fix_start = 56;
- fix_end = fix_end > 57 ? fix_end : 57;
+ fix_start = MUL32x16_REG;
+ fix_end = fix_end > R57_REG ? fix_end : R57_REG;
strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
}
reg_alloc_order [i] = i;
}
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (!call_used_regs[regno])
- CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
- for (regno = 32; regno < 60; regno++)
- if (!fixed_regs[regno])
- SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
- if (!TARGET_ARC600_FAMILY)
+ /* Reduced configuration: don't use r4-r9, r16-r25. */
+ if (TARGET_RF16)
{
- for (regno = 32; regno <= 60; regno++)
- CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
-
- /* If they have used -ffixed-lp_count, make sure it takes
- effect. */
- if (fixed_regs[LP_COUNT])
- {
- CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
- CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
- CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
-
- /* Instead of taking out SF_MODE like below, forbid it outright. */
- arc_hard_regno_modes[60] = 0;
- }
- else
- arc_hard_regno_modes[60] = 1 << (int) S_MODE;
+ for (i = R4_REG; i <= R9_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1;
+ for (i = R16_REG; i <= R25_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1;
}
/* ARCHS has 64-bit data-path which makes use of the even-odd paired
registers. */
if (TARGET_HS)
- {
- for (regno = 1; regno < 32; regno +=2)
- {
- arc_hard_regno_modes[regno] = S_MODES;
- }
- }
+ for (regno = R1_REG; regno < R32_REG; regno +=2)
+ arc_hard_regno_modes[regno] = S_MODES;
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- {
- if (i < 29)
- {
- if ((TARGET_Q_CLASS || TARGET_RRQ_CLASS)
- && ((i <= 3) || ((i >= 12) && (i <= 15))))
- arc_regno_reg_class[i] = ARCOMPACT16_REGS;
- else
- arc_regno_reg_class[i] = GENERAL_REGS;
- }
- else if (i < 60)
- arc_regno_reg_class[i]
- = (fixed_regs[i]
- ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
- ? CHEAP_CORE_REGS : ALL_CORE_REGS)
- : (((!TARGET_ARC600_FAMILY)
- && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
- ? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
- else
- arc_regno_reg_class[i] = NO_REGS;
- }
-
- /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS / TARGET_RRQ_CLASS
- has not been activated. */
- if (!TARGET_Q_CLASS && !TARGET_RRQ_CLASS)
- CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
- if (!TARGET_Q_CLASS)
- CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
-
- gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
+ if (i < ILINK1_REG)
+ {
+ if ((TARGET_Q_CLASS || TARGET_RRQ_CLASS)
+ && ((i <= R3_REG) || ((i >= R12_REG) && (i <= R15_REG))))
+ arc_regno_reg_class[i] = ARCOMPACT16_REGS;
+ else
+ arc_regno_reg_class[i] = GENERAL_REGS;
+ }
+ else if (i < LP_COUNT)
+ arc_regno_reg_class[i] = GENERAL_REGS;
+ else
+ arc_regno_reg_class[i] = NO_REGS;
/* Handle Special Registers. */
- arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register. */
- if (!TARGET_V2)
- arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register. */
- arc_regno_reg_class[31] = LINK_REGS; /* blink register. */
- arc_regno_reg_class[60] = LPCOUNT_REG;
- arc_regno_reg_class[61] = NO_REGS; /* CC_REG: must be NO_REGS. */
+ arc_regno_reg_class[CC_REG] = NO_REGS; /* CC_REG: must be NO_REGS. */
arc_regno_reg_class[62] = GENERAL_REGS;
if (TARGET_DPFP)
- {
- for (i = 40; i < 44; ++i)
- {
- arc_regno_reg_class[i] = DOUBLE_REGS;
-
- /* Unless they want us to do 'mov d1, 0x00000000' make sure
- no attempt is made to use such a register as a destination
- operand in *movdf_insn. */
- if (!TARGET_ARGONAUT_SET)
- {
- /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
- interpreted to mean they can use D1 or D2 in their insn. */
- CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS ], i);
- CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS ], i);
- CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS ], i);
- CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
- }
- }
- }
+ for (i = R40_REG; i < R44_REG; ++i)
+ {
+ arc_regno_reg_class[i] = DOUBLE_REGS;
+ if (!TARGET_ARGONAUT_SET)
+ CLEAR_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i);
+ }
else
{
- /* Disable all DOUBLE_REGISTER settings,
- if not generating DPFP code. */
- arc_regno_reg_class[40] = ALL_REGS;
- arc_regno_reg_class[41] = ALL_REGS;
- arc_regno_reg_class[42] = ALL_REGS;
- arc_regno_reg_class[43] = ALL_REGS;
+ /* Disable all DOUBLE_REGISTER settings, if not generating DPFP
+ code. */
+ arc_regno_reg_class[R40_REG] = ALL_REGS;
+ arc_regno_reg_class[R41_REG] = ALL_REGS;
+ arc_regno_reg_class[R42_REG] = ALL_REGS;
+ arc_regno_reg_class[R43_REG] = ALL_REGS;
- fixed_regs[40] = 1;
- fixed_regs[41] = 1;
- fixed_regs[42] = 1;
- fixed_regs[43] = 1;
+ fixed_regs[R40_REG] = 1;
+ fixed_regs[R41_REG] = 1;
+ fixed_regs[R42_REG] = 1;
+ fixed_regs[R43_REG] = 1;
- arc_hard_regno_modes[40] = 0;
- arc_hard_regno_modes[42] = 0;
-
- CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
+ arc_hard_regno_modes[R40_REG] = 0;
+ arc_hard_regno_modes[R42_REG] = 0;
}
if (TARGET_SIMD_SET)
}
/* pc : r63 */
- arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
+ arc_regno_reg_class[PCL_REG] = NO_REGS;
/*ARCV2 Accumulator. */
if ((TARGET_V2
&& (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
|| TARGET_PLUS_DMPY)
{
- arc_regno_reg_class[ACCL_REGNO] = WRITABLE_CORE_REGS;
- arc_regno_reg_class[ACCH_REGNO] = WRITABLE_CORE_REGS;
- SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCL_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCH_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCL_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCH_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], ACCL_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], ACCH_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], ACCL_REGNO);
- SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], ACCH_REGNO);
-
- /* Allow the compiler to freely use them. */
- fixed_regs[ACCL_REGNO] = 0;
- fixed_regs[ACCH_REGNO] = 0;
-
- arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
+ arc_regno_reg_class[ACCL_REGNO] = GENERAL_REGS;
+ arc_regno_reg_class[ACCH_REGNO] = GENERAL_REGS;
+
+ /* Allow the compiler to freely use them. */
+ if (!TEST_HARD_REG_BIT (overrideregs, ACCL_REGNO))
+ fixed_regs[ACCL_REGNO] = 0;
+ if (!TEST_HARD_REG_BIT (overrideregs, ACCH_REGNO))
+ fixed_regs[ACCH_REGNO] = 0;
+
+ if (!fixed_regs[ACCH_REGNO] && !fixed_regs[ACCL_REGNO])
+ arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
}
}
+/* Implement TARGET_HARD_REGNO_NREGS. */
+
+static unsigned int
+arc_hard_regno_nregs (unsigned int regno, machine_mode mode)
+{
+ if (GET_MODE_SIZE (mode) == 16
+ && regno >= ARC_FIRST_SIMD_VR_REG
+ && regno <= ARC_LAST_SIMD_VR_REG)
+ return 1;
+
+ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+}
+
/* Implement TARGET_HARD_REGNO_MODE_OK. */
static bool
return 1;
}
-/* Set the default attributes for TYPE. */
-
-void
-arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
-{
- gcc_unreachable();
-}
-
/* Misc. utilities. */
/* X and Y are two things to compare using CODE. Emit the compare insn and
/* Provide the costs of an addressing mode that contains ADDR.
If ADDR is not a valid address, its cost is irrelevant. */
-int
+static int
arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
{
switch (GET_CODE (addr))
unsigned int args_size; /* # bytes that outgoing arguments take up. */
unsigned int reg_size; /* # bytes needed to store regs. */
unsigned int var_size; /* # bytes that variables take up. */
- unsigned int reg_offset; /* Offset from new sp to store regs. */
unsigned int gmask; /* Mask of saved gp registers. */
- int initialized; /* Nonzero if frame size already calculated. */
+ bool initialized; /* FALSE if frame size already calculated. */
short millicode_start_reg;
short millicode_end_reg;
bool save_return_addr;
struct arc_frame_info frame_info;
/* To keep track of unalignment caused by short insns. */
int unalign;
- int force_short_suffix; /* Used when disgorging return delay slot insns. */
- const char *size_reason;
struct arc_ccfsm ccfsm_current;
/* Map from uid to ccfsm state during branch shortening. */
rtx ccfsm_current_insn;
Addition for pic: The gp register needs to be saved if the current
function changes it to access gotoff variables.
FIXME: This will not be needed if we used some arbitrary register
- instead of r26.
-*/
+ instead of r26. */
static bool
arc_must_save_register (int regno, struct function *func)
&& !firq_auto_save_p)
return true;
- if (flag_pic && crtl->uses_pic_offset_table
- && regno == PIC_OFFSET_TABLE_REGNUM)
- return true;
-
return false;
}
/* Helper function to wrap FRAME_POINTER_NEEDED. We do this as
FRAME_POINTER_NEEDED will not be true until the IRA (Integrated
Register Allocator) pass, while we want to get the frame size
- correct earlier than the IRA pass. */
+ correct earlier than the IRA pass.
+
+ When a function uses eh_return we must ensure that the fp register
+ is saved and then restored so that the unwinder can restore the
+ correct value for the frame we are going to jump to.
+
+ To do this we force all frames that call eh_return to require a
+ frame pointer (see arc_frame_pointer_required), this
+ will ensure that the previous frame pointer is stored on entry to
+ the function, and will then be reloaded at function exit.
+
+ As the frame pointer is handled as a special case in our prologue
+ and epilogue code it must not be saved and restored using the
+ MUST_SAVE_REGISTER mechanism otherwise we run into issues where GCC
+ believes that the function is not using a frame pointer and that
+ the value in the fp register is the frame pointer, while the
+ prologue and epilogue are busy saving and restoring the fp
+ register.
+
+ During compilation of a function the frame size is evaluated
+ multiple times, it is not until the reload pass is complete the the
+ frame size is considered fixed (it is at this point that space for
+ all spills has been allocated). However the frame_pointer_needed
+ variable is not set true until the register allocation pass, as a
+ result in the early stages the frame size does not include space
+ for the frame pointer to be spilled.
+
+ The problem that this causes is that the rtl generated for
+ EH_RETURN_HANDLER_RTX uses the details of the frame size to compute
+ the offset from the frame pointer at which the return address
+ lives. However, in early passes GCC has not yet realised we need a
+ frame pointer, and so has not included space for the frame pointer
+ in the frame size, and so gets the offset of the return address
+ wrong. This should not be an issue as in later passes GCC has
+ realised that the frame pointer needs to be spilled, and has
+ increased the frame size. However, the rtl for the
+ EH_RETURN_HANDLER_RTX is not regenerated to use the newer, larger
+ offset, and the wrong smaller offset is used. */
+
static bool
arc_frame_pointer_needed (void)
{
- return (frame_pointer_needed);
+ return (frame_pointer_needed || crtl->calls_eh_return);
}
-
/* Return non-zero if there are registers to be saved or loaded using
millicode thunks. We can only use consecutive sequences starting
with r13, and not going beyond r25.
return 0;
}
-/* Return the bytes needed to compute the frame pointer from the current
- stack pointer.
+/* Return the bytes needed to compute the frame pointer from the
+ current stack pointer. */
- SIZE is the size needed for local variables. */
-
-unsigned int
-arc_compute_frame_size (int size) /* size = # of var. bytes allocated. */
+static unsigned int
+arc_compute_frame_size (void)
{
int regno;
unsigned int total_size, var_size, args_size, pretend_size, extra_size;
- unsigned int reg_size, reg_offset;
+ unsigned int reg_size;
unsigned int gmask;
- struct arc_frame_info *frame_info = &cfun->machine->frame_info;
+ struct arc_frame_info *frame_info;
+ int size;
+ unsigned int extra_plus_reg_size;
+ unsigned int extra_plus_reg_size_aligned;
- size = ARC_STACK_ALIGN (size);
+ /* The answer might already be known. */
+ if (cfun->machine->frame_info.initialized)
+ return cfun->machine->frame_info.total_size;
- /* 1) Size of locals and temporaries */
+ frame_info = &cfun->machine->frame_info;
+ size = ARC_STACK_ALIGN (get_frame_size ());
+
+ /* 1) Size of locals and temporaries. */
var_size = size;
- /* 2) Size of outgoing arguments */
+ /* 2) Size of outgoing arguments. */
args_size = crtl->outgoing_args_size;
/* 3) Calculate space needed for saved registers.
}
}
- /* 4) Space for back trace data structure.
- <return addr reg size> (if required) + <fp size> (if required). */
- frame_info->save_return_addr
- = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
- /* Saving blink reg in case of leaf function for millicode thunk calls. */
- if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
+ /* In a frame that calls __builtin_eh_return two data registers are
+ used to pass values back to the exception handler.
+
+ Ensure that these registers are spilled to the stack so that the
+ exception throw code can find them, and update the saved values.
+ The handling code will then consume these reloaded values to
+ handle the exception. */
+ if (crtl->calls_eh_return)
+ for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
+ {
+ reg_size += UNITS_PER_WORD;
+ gmask |= 1L << regno;
+ }
+
+ /* Check if we need to save the return address. */
+ frame_info->save_return_addr = (!crtl->is_leaf
+ || df_regs_ever_live_p (RETURN_ADDR_REGNUM)
+ || crtl->calls_eh_return);
+
+ /* Saving blink reg for millicode thunk calls. */
+ if (TARGET_MILLICODE_THUNK_SET
+ && !crtl->calls_eh_return)
{
if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
frame_info->save_return_addr = true;
}
+ /* 4) Calculate extra size made up of the blink + fp size. */
extra_size = 0;
if (arc_must_save_return_addr (cfun))
extra_size = 4;
pretend_size = crtl->args.pretend_args_size;
/* Ensure everything before the locals is aligned appropriately. */
- {
- unsigned int extra_plus_reg_size;
- unsigned int extra_plus_reg_size_aligned;
-
- extra_plus_reg_size = extra_size + reg_size;
- extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
- reg_size = extra_plus_reg_size_aligned - extra_size;
- }
+ extra_plus_reg_size = extra_size + reg_size;
+ extra_plus_reg_size_aligned = ARC_STACK_ALIGN (extra_plus_reg_size);
+ reg_size = extra_plus_reg_size_aligned - extra_size;
/* Compute total frame size. */
total_size = var_size + args_size + extra_size + pretend_size + reg_size;
- total_size = ARC_STACK_ALIGN (total_size);
-
- /* Compute offset of register save area from stack pointer:
- Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
- */
- reg_offset = (total_size - (pretend_size + reg_size + extra_size)
- + (arc_frame_pointer_needed () ? 4 : 0));
+ /* It used to be the case that the alignment was forced at this
+ point. However, that is dangerous, calculations based on
+ total_size would be wrong. Given that this has never cropped up
+ as an issue I've changed this to an assert for now. */
+ gcc_assert (total_size == ARC_STACK_ALIGN (total_size));
/* Save computed information. */
frame_info->total_size = total_size;
frame_info->var_size = var_size;
frame_info->args_size = args_size;
frame_info->reg_size = reg_size;
- frame_info->reg_offset = reg_offset;
frame_info->gmask = gmask;
frame_info->initialized = reload_completed;
return total_size;
}
-/* Common code to save/restore registers. */
-/* BASE_REG is the base register to use for addressing and to adjust.
- GMASK is a bitmask of general purpose registers to save/restore.
- epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
- If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
- using a pre-modify for the first memory access. *FIRST_OFFSET is then
- zeroed. */
-
-static void
-arc_save_restore (rtx base_reg,
- unsigned int gmask, int epilogue_p, int *first_offset)
-{
- unsigned int offset = 0;
- int regno;
- struct arc_frame_info *frame = &cfun->machine->frame_info;
- rtx sibthunk_insn = NULL_RTX;
-
- if (gmask)
- {
- /* Millicode thunks implementation:
- Generates calls to millicodes for registers starting from r13 to r25
- Present Limitations:
- - Only one range supported. The remaining regs will have the ordinary
- st and ld instructions for store and loads. Hence a gmask asking
- to store r13-14, r16-r25 will only generate calls to store and
- load r13 to r14 while store and load insns will be generated for
- r16 to r25 in the prologue and epilogue respectively.
-
- - Presently library only supports register ranges starting from r13.
- */
- if (epilogue_p == 2 || frame->millicode_end_reg > 14)
- {
- int start_call = frame->millicode_start_reg;
- int end_call = frame->millicode_end_reg;
- int n_regs = end_call - start_call + 1;
- int i = 0, r, off = 0;
- rtx insn;
- rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
-
- if (*first_offset)
- {
- /* "reg_size" won't be more than 127 . */
- gcc_assert (epilogue_p || abs (*first_offset) <= 127);
- frame_add (base_reg, *first_offset);
- *first_offset = 0;
- }
- insn = gen_rtx_PARALLEL
- (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
- if (epilogue_p == 2)
- i += 2;
- else
- XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
- for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
- {
- rtx reg = gen_rtx_REG (SImode, r);
- rtx mem
- = gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
-
- if (epilogue_p)
- XVECEXP (insn, 0, i) = gen_rtx_SET (reg, mem);
- else
- XVECEXP (insn, 0, i) = gen_rtx_SET (mem, reg);
- gmask = gmask & ~(1L << r);
- }
- if (epilogue_p == 2)
- sibthunk_insn = insn;
- else
- {
- insn = frame_insn (insn);
- if (epilogue_p)
- for (r = start_call; r <= end_call; r++)
- {
- rtx reg = gen_rtx_REG (SImode, r);
- add_reg_note (insn, REG_CFA_RESTORE, reg);
- }
- }
- offset += off;
- }
-
- for (regno = 0; regno <= 31; regno++)
- {
- machine_mode mode = SImode;
- bool found = false;
-
- if (TARGET_LL64
- && (regno % 2 == 0)
- && ((gmask & (1L << regno)) != 0)
- && ((gmask & (1L << (regno+1))) != 0))
- {
- found = true;
- mode = DImode;
- }
- else if ((gmask & (1L << regno)) != 0)
- {
- found = true;
- mode = SImode;
- }
-
- if (found)
- {
- rtx reg = gen_rtx_REG (mode, regno);
- rtx addr, mem;
- int cfa_adjust = *first_offset;
-
- if (*first_offset)
- {
- gcc_assert (!offset);
- addr = plus_constant (Pmode, base_reg, *first_offset);
- addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
- *first_offset = 0;
- }
- else
- {
- gcc_assert (SMALL_INT (offset));
- addr = plus_constant (Pmode, base_reg, offset);
- }
- mem = gen_frame_mem (mode, addr);
- if (epilogue_p)
- {
- rtx insn =
- frame_move_inc (reg, mem, base_reg, addr);
- add_reg_note (insn, REG_CFA_RESTORE, reg);
- if (cfa_adjust)
- {
- enum reg_note note = REG_CFA_ADJUST_CFA;
- add_reg_note (insn, note,
- gen_rtx_SET (stack_pointer_rtx,
- plus_constant (Pmode,
- stack_pointer_rtx,
- cfa_adjust)));
- }
- }
- else
- frame_move_inc (mem, reg, base_reg, addr);
- offset += UNITS_PER_WORD;
- if (mode == DImode)
- {
- offset += UNITS_PER_WORD;
- ++regno;
- }
- } /* if */
- } /* for */
- }/* if */
- if (sibthunk_insn)
- {
- int start_call = frame->millicode_start_reg;
- int end_call = frame->millicode_end_reg;
- int r;
-
- rtx r12 = gen_rtx_REG (Pmode, 12);
-
- frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
- XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
- XVECEXP (sibthunk_insn, 0, 1)
- = gen_rtx_SET (stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
- sibthunk_insn = emit_jump_insn (sibthunk_insn);
- RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
-
- /* Would be nice if we could do this earlier, when the PARALLEL
- is populated, but these need to be attached after the
- emit. */
- for (r = start_call; r <= end_call; r++)
- {
- rtx reg = gen_rtx_REG (SImode, r);
- add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
- }
- }
-} /* arc_save_restore */
-
/* Build dwarf information when the context is saved via AUX_IRQ_CTRL
mechanism. */
RTX_FRAME_RELATED_P (insn) = 1;
}
-/* Set up the stack and frame pointer (if desired) for the function. */
+/* Helper for prologue: emit frame store with pre_modify or pre_dec to
+ save register REG on stack. An initial offset OFFSET can be passed
+ to the function. */
-void
-arc_expand_prologue (void)
+static int
+frame_save_reg (rtx reg, HOST_WIDE_INT offset)
{
- int size = get_frame_size ();
- unsigned int gmask = cfun->machine->frame_info.gmask;
- /* unsigned int frame_pointer_offset;*/
- unsigned int frame_size_to_allocate;
- /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
- Change the stack layout so that we rather store a high register with the
- PRE_MODIFY, thus enabling more short insn generation.) */
- int first_offset = 0;
- unsigned int fn_type = arc_compute_function_type (cfun);
+ rtx addr;
- /* Naked functions don't have prologue. */
- if (ARC_NAKED_P (fn_type))
- return;
+ if (offset)
+ {
+ rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
+ offset - GET_MODE_SIZE (GET_MODE (reg)));
+ addr = gen_frame_mem (GET_MODE (reg),
+ gen_rtx_PRE_MODIFY (Pmode,
+ stack_pointer_rtx,
+ tmp));
+ }
+ else
+ addr = gen_frame_mem (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx));
+ frame_move_inc (addr, reg, stack_pointer_rtx, 0);
- size = ARC_STACK_ALIGN (size);
+ return GET_MODE_SIZE (GET_MODE (reg)) - offset;
+}
- /* Compute/get total frame size. */
- size = (!cfun->machine->frame_info.initialized
- ? arc_compute_frame_size (size)
- : cfun->machine->frame_info.total_size);
+/* Helper for epilogue: emit frame load with post_modify or post_inc
+ to restore register REG from stack. The initial offset is passed
+ via OFFSET. */
- if (flag_stack_usage_info)
- current_function_static_stack_size = size;
+static int
+frame_restore_reg (rtx reg, HOST_WIDE_INT offset)
+{
+ rtx addr, insn;
- /* Keep track of frame size to be allocated. */
- frame_size_to_allocate = size;
+ if (offset)
+ {
+ rtx tmp = plus_constant (Pmode, stack_pointer_rtx,
+ offset + GET_MODE_SIZE (GET_MODE (reg)));
+ addr = gen_frame_mem (GET_MODE (reg),
+ gen_rtx_POST_MODIFY (Pmode,
+ stack_pointer_rtx,
+ tmp));
+ }
+ else
+ addr = gen_frame_mem (GET_MODE (reg), gen_rtx_POST_INC (Pmode,
+ stack_pointer_rtx));
+ insn = frame_move_inc (reg, addr, stack_pointer_rtx, 0);
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+
+ if (reg == frame_pointer_rtx)
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_pointer_rtx,
+ GET_MODE_SIZE (GET_MODE (reg)) + offset));
+ else
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ GET_MODE_SIZE (GET_MODE (reg))
+ + offset)));
- /* These cases shouldn't happen. Catch them now. */
- gcc_assert (!(size == 0 && gmask));
+ return GET_MODE_SIZE (GET_MODE (reg)) + offset;
+}
- /* Allocate space for register arguments if this is a variadic function. */
- if (cfun->machine->frame_info.pretend_size != 0)
+/* Check if we have a continous range to be save/restored with the
+ help of enter/leave instructions. A vaild register range starts
+ from $r13 and is up to (including) $r26. */
+
+static bool
+arc_enter_leave_p (unsigned int gmask)
+{
+ int regno;
+ unsigned int rmask = 0;
+
+ if (!gmask)
+ return false;
+
+ for (regno = ENTER_LEAVE_START_REG;
+ regno <= ENTER_LEAVE_END_REG && (gmask & (1L << regno)); regno++)
+ rmask |= 1L << regno;
+
+ if (rmask ^ gmask)
+ return false;
+
+ return true;
+}
+
+/* ARC's prologue, save any needed call-saved regs (and call-used if
+ this is an interrupt handler) for ARCompact ISA, using ST/STD
+ instructions. */
+
+static int
+arc_save_callee_saves (unsigned int gmask,
+ bool save_blink,
+ bool save_fp,
+ HOST_WIDE_INT offset)
+{
+ rtx reg;
+ int frame_allocated = 0;
+
+ /* The home-grown ABI says link register is saved first. */
+ if (save_blink)
{
- /* Ensure pretend_size is maximum of 8 * word_size. */
- gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ frame_allocated += frame_save_reg (reg, offset);
+ offset = 0;
+ }
+
+ /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */
+ if (gmask)
+ for (int i = 31; i >= 0; i--)
+ {
+ machine_mode save_mode = SImode;
- frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
- frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
+ if (TARGET_LL64
+ && ((i - 1) % 2 == 0)
+ && ((gmask & (1L << i)) != 0)
+ && ((gmask & (1L << (i - 1))) != 0))
+ {
+ save_mode = DImode;
+ --i;
+ }
+ else if ((gmask & (1L << i)) == 0)
+ continue;
+
+ reg = gen_rtx_REG (save_mode, i);
+ frame_allocated += frame_save_reg (reg, offset);
+ offset = 0;
+ }
+
+ /* Save frame pointer if needed. First save the FP on stack, if not
+ autosaved. Unfortunately, I cannot add it to gmask and use the
+ above loop to save fp because our ABI states fp goes aftert all
+ registers are saved. */
+ if (save_fp)
+ {
+ frame_allocated += frame_save_reg (frame_pointer_rtx, offset);
+ offset = 0;
}
- /* IRQ using automatic save mechanism will save the register before
- anything we do. */
- if (ARC_AUTO_IRQ_P (fn_type)
- && !ARC_FAST_INTERRUPT_P (fn_type))
+ /* Emit mov fp,sp. */
+ if (arc_frame_pointer_needed ())
+ frame_move (frame_pointer_rtx, stack_pointer_rtx);
+
+ return frame_allocated;
+}
+
+/* ARC's epilogue, restore any required call-saved regs (and call-used
+ if it is for an interrupt handler) using LD/LDD instructions. */
+
+static int
+arc_restore_callee_saves (unsigned int gmask,
+ bool restore_blink,
+ bool restore_fp,
+ HOST_WIDE_INT offset,
+ HOST_WIDE_INT allocated)
+{
+ rtx reg;
+ int frame_deallocated = 0;
+
+ /* Emit mov fp,sp. */
+ if (arc_frame_pointer_needed () && offset)
{
- arc_dwarf_emit_irq_save_regs ();
+ frame_move (stack_pointer_rtx, frame_pointer_rtx);
+ frame_deallocated += offset;
+ offset = 0;
}
- /* The home-grown ABI says link register is saved first. */
- if (arc_must_save_return_addr (cfun)
- && !ARC_AUTOBLINK_IRQ_P (fn_type))
+ if (restore_fp)
{
- rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
- rtx mem = gen_frame_mem (Pmode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx));
+ /* Any offset is taken care by previous if-statement. */
+ gcc_assert (offset == 0);
+ frame_deallocated += frame_restore_reg (frame_pointer_rtx, 0);
+ }
- frame_move_inc (mem, ra, stack_pointer_rtx, 0);
- frame_size_to_allocate -= UNITS_PER_WORD;
+ if (offset)
+ {
+ /* No $fp involved, we need to do an add to set the $sp to the
+ location of the first register. */
+ frame_stack_add (offset);
+ frame_deallocated += offset;
+ offset = 0;
}
- /* Save any needed call-saved regs (and call-used if this is an
- interrupt handler) for ARCompact ISA. */
- if (cfun->machine->frame_info.reg_size)
+ /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */
+ if (gmask)
+ for (int i = 0; i <= 31; i++)
+ {
+ machine_mode restore_mode = SImode;
+
+ if (TARGET_LL64
+ && ((i % 2) == 0)
+ && ((gmask & (1L << i)) != 0)
+ && ((gmask & (1L << (i + 1))) != 0))
+ restore_mode = DImode;
+ else if ((gmask & (1L << i)) == 0)
+ continue;
+
+ reg = gen_rtx_REG (restore_mode, i);
+ frame_deallocated += frame_restore_reg (reg, 0);
+ offset = 0;
+
+ if (restore_mode == DImode)
+ i++;
+ }
+
+ if (restore_blink)
{
- first_offset = -cfun->machine->frame_info.reg_size;
- /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */
- arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
- frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ frame_deallocated += frame_restore_reg (reg, allocated
+ - frame_deallocated
+ /* Consider as well the
+ current restored
+ register size. */
+ - UNITS_PER_WORD);
}
- /* Save frame pointer if needed. First save the FP on stack, if not
- autosaved. */
- if (arc_frame_pointer_needed ()
- && !ARC_AUTOFP_IRQ_P (fn_type))
+ return frame_deallocated;
+}
+
+/* ARC prologue, save the registers using enter instruction. Leave
+ instruction can also save $blink (SAVE_BLINK) and $fp (SAVE_FP)
+ register. */
+
+static int
+arc_save_callee_enter (unsigned int gmask,
+ bool save_blink,
+ bool save_fp,
+ HOST_WIDE_INT offset)
+{
+ int start_reg = ENTER_LEAVE_START_REG;
+ int end_reg = ENTER_LEAVE_END_REG;
+ int regno, indx, off, nregs;
+ rtx insn, reg, mem;
+ int frame_allocated = 0;
+
+ for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+ regno++;
+
+ end_reg = regno - 1;
+ nregs = end_reg - start_reg + 1;
+ nregs += save_blink ? 1 : 0;
+ nregs += save_fp ? 1 : 0;
+
+ if (offset)
+ frame_stack_add (offset);
+
+ insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + (save_fp ? 1 : 0)
+ + 1));
+ indx = 0;
+
+ reg = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode,
+ stack_pointer_rtx,
+ nregs * UNITS_PER_WORD));
+ RTX_FRAME_RELATED_P (reg) = 1;
+ XVECEXP (insn, 0, indx++) = reg;
+ off = nregs * UNITS_PER_WORD;
+
+ if (save_blink)
+ {
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ mem = gen_frame_mem (Pmode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
+ off -= UNITS_PER_WORD;
+ save_blink = false;
+ }
+
+ for (regno = start_reg;
+ regno <= end_reg;
+ regno++, indx++, off -= UNITS_PER_WORD)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
+ gmask = gmask & ~(1L << regno);
+ }
+
+ if (save_fp)
+ {
+ mem = gen_frame_mem (Pmode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, frame_pointer_rtx);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
+ off -= UNITS_PER_WORD;
+
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (frame_pointer_rtx,
+ stack_pointer_rtx);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
+ save_fp = false;
+ }
+
+ gcc_assert (off == 0);
+ insn = frame_insn (insn);
+
+ add_reg_note (insn, REG_INC, stack_pointer_rtx);
+
+ frame_allocated = nregs * UNITS_PER_WORD;
+
+ /* offset is a negative number, make sure we add it. */
+ return frame_allocated - offset;
+}
+
+/* ARC epilogue, restore the registers using leave instruction. An
+ initial offset is passed in OFFSET. Besides restoring an register
+ range, leave can also restore $blink (RESTORE_BLINK), or $fp
+ (RESTORE_FP), and can automatic return (RETURN_P). */
+
+static int
+arc_restore_callee_leave (unsigned int gmask,
+ bool restore_blink,
+ bool restore_fp,
+ bool return_p,
+ HOST_WIDE_INT offset)
+{
+ int start_reg = ENTER_LEAVE_START_REG;
+ int end_reg = ENTER_LEAVE_END_REG;
+ int regno, indx, off, nregs;
+ rtx insn, reg, mem;
+ int frame_allocated = 0;
+
+ for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+ regno++;
+
+ end_reg = regno - 1;
+ nregs = end_reg - start_reg + 1;
+ nregs += restore_blink ? 1 : 0;
+ nregs += restore_fp ? 1 : 0;
+
+ insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1
+ + (return_p ? 1 : 0)));
+ indx = 0;
+
+ if (return_p)
+ XVECEXP (insn, 0, indx++) = ret_rtx;
+
+ if (restore_fp)
+ {
+ /* I cannot emit set (sp, fp) here as cselib expects a single sp
+ set and not two. Thus, use the offset, and change sp adjust
+ value. */
+ frame_allocated += offset;
+ }
+
+ if (offset && !restore_fp)
+ {
+ /* This add is only emmited when we do not restore fp with leave
+ instruction. */
+ frame_stack_add (offset);
+ frame_allocated += offset;
+ offset = 0;
+ }
+
+ reg = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode,
+ stack_pointer_rtx,
+ offset + nregs * UNITS_PER_WORD));
+ RTX_FRAME_RELATED_P (reg) = 1;
+ XVECEXP (insn, 0, indx++) = reg;
+ off = nregs * UNITS_PER_WORD;
+
+ if (restore_blink)
+ {
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ mem = gen_frame_mem (Pmode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
+ off -= UNITS_PER_WORD;
+ }
+
+ for (regno = start_reg;
+ regno <= end_reg;
+ regno++, indx++, off -= UNITS_PER_WORD)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
+ gmask = gmask & ~(1L << regno);
+ }
+
+ if (restore_fp)
+ {
+ mem = gen_frame_mem (Pmode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (frame_pointer_rtx, mem);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1;
+ off -= UNITS_PER_WORD;
+ }
+
+ gcc_assert (off == 0);
+ if (return_p)
+ {
+ insn = emit_jump_insn (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else
+ insn = frame_insn (insn);
+
+ add_reg_note (insn, REG_INC, stack_pointer_rtx);
+
+ /* Dwarf related info. */
+ if (restore_fp)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (Pmode, stack_pointer_rtx,
+ offset + nregs * UNITS_PER_WORD));
+ }
+ else
+ {
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ nregs * UNITS_PER_WORD)));
+ }
+ if (restore_blink)
+ add_reg_note (insn, REG_CFA_RESTORE,
+ gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
+ for (regno = start_reg; regno <= end_reg; regno++)
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (SImode, regno));
+
+ frame_allocated += nregs * UNITS_PER_WORD;
+
+ return frame_allocated;
+}
+
+/* Millicode thunks implementation:
+ Generates calls to millicodes for registers starting from r13 to r25
+ Present Limitations:
+ - Only one range supported. The remaining regs will have the ordinary
+ st and ld instructions for store and loads. Hence a gmask asking
+ to store r13-14, r16-r25 will only generate calls to store and
+ load r13 to r14 while store and load insns will be generated for
+ r16 to r25 in the prologue and epilogue respectively.
+
+ - Presently library only supports register ranges starting from r13.
+*/
+
+static int
+arc_save_callee_milli (unsigned int gmask,
+ bool save_blink,
+ bool save_fp,
+ HOST_WIDE_INT offset,
+ HOST_WIDE_INT reg_size)
+{
+ int start_reg = 13;
+ int end_reg = 25;
+ int regno, indx, off, nregs;
+ rtx insn, reg, mem;
+ int frame_allocated = 0;
+
+ for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+ regno++;
+
+ end_reg = regno - 1;
+ nregs = end_reg - start_reg + 1;
+ gcc_assert (end_reg > 14);
+
+
+ /* Allocate space on stack for the registers, and take into account
+ also the initial offset. The registers will be saved using
+ offsets. N.B. OFFSET is a negative number. */
+ if (save_blink)
+ {
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ frame_allocated += frame_save_reg (reg, offset);
+ offset = 0;
+ }
+
+ if (reg_size || offset)
+ {
+ frame_stack_add (offset - reg_size);
+ frame_allocated += nregs * UNITS_PER_WORD - offset;
+ offset = 0;
+ }
+
+ /* Start generate millicode call. */
+ insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+ indx = 0;
+
+ /* This is a call, we clobber blink. */
+ XVECEXP (insn, 0, nregs) =
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
+
+ for (regno = start_reg, indx = 0, off = 0;
+ regno <= end_reg;
+ regno++, indx++, off += UNITS_PER_WORD)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
+ gmask = gmask & ~(1L << regno);
+ }
+ insn = frame_insn (insn);
+
+ /* Add DWARF info. */
+ for (regno = start_reg, off = 0;
+ regno <= end_reg;
+ regno++, off += UNITS_PER_WORD)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_rtx_MEM (SImode, plus_constant (Pmode,
+ stack_pointer_rtx, off));
+ add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
+
+ }
+
+ /* In the case of millicode thunk, we need to restore the
+ clobbered blink register. */
+ if (arc_must_save_return_addr (cfun))
{
- rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (-UNITS_PER_WORD + first_offset));
- rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
+ emit_insn (gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
+ gen_rtx_MEM (Pmode,
+ plus_constant (Pmode,
stack_pointer_rtx,
- addr));
- frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
- frame_size_to_allocate -= UNITS_PER_WORD;
- first_offset = 0;
+ reg_size))));
}
+ /* Save remaining registers using st instructions. */
+ for (regno = 0; regno <= 31; regno++)
+ {
+ if ((gmask & (1L << regno)) == 0)
+ continue;
+
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ frame_move_inc (mem, reg, stack_pointer_rtx, 0);
+ frame_allocated += UNITS_PER_WORD;
+ off += UNITS_PER_WORD;
+ }
+
+ /* Save frame pointer if needed. First save the FP on stack, if not
+ autosaved. Unfortunately, I cannot add it to gmask and use the
+ above loop to save fp because our ABI states fp goes aftert all
+ registers are saved. */
+ if (save_fp)
+ frame_allocated += frame_save_reg (frame_pointer_rtx, offset);
+
/* Emit mov fp,sp. */
if (arc_frame_pointer_needed ())
+ frame_move (frame_pointer_rtx, stack_pointer_rtx);
+
+ return frame_allocated;
+}
+
+/* Like the previous function but restore. */
+
+static int
+arc_restore_callee_milli (unsigned int gmask,
+ bool restore_blink,
+ bool restore_fp,
+ bool return_p,
+ HOST_WIDE_INT offset)
+{
+ int start_reg = 13;
+ int end_reg = 25;
+ int regno, indx, off, nregs;
+ rtx insn, reg, mem;
+ int frame_allocated = 0;
+
+ for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+ regno++;
+
+ end_reg = regno - 1;
+ nregs = end_reg - start_reg + 1;
+ gcc_assert (end_reg > 14);
+
+ /* Emit mov fp,sp. */
+ if (arc_frame_pointer_needed () && offset)
+ {
+ frame_move (stack_pointer_rtx, frame_pointer_rtx);
+ frame_allocated = offset;
+ offset = 0;
+ }
+
+ if (restore_fp)
+ frame_allocated += frame_restore_reg (frame_pointer_rtx, 0);
+
+ if (offset)
+ {
+ /* No fp involved, hence, we need to adjust the sp via an
+ add. */
+ frame_stack_add (offset);
+ frame_allocated += offset;
+ offset = 0;
+ }
+
+ /* Start generate millicode call. */
+ insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc ((return_p ? 1 : 0)
+ + nregs + 1));
+ indx = 0;
+
+ if (return_p)
+ {
+ /* sibling call, the blink is restored with the help of the
+ value held into r12. */
+ reg = gen_rtx_REG (Pmode, 12);
+ XVECEXP (insn, 0, indx++) = ret_rtx;
+ XVECEXP (insn, 0, indx++) =
+ gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg));
+ frame_allocated += UNITS_PER_WORD;
+ }
+ else
+ {
+ /* This is a call, we clobber blink. */
+ XVECEXP (insn, 0, nregs) =
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
+ }
+
+ for (regno = start_reg, off = 0;
+ regno <= end_reg;
+ regno++, indx++, off += UNITS_PER_WORD)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ XVECEXP (insn, 0, indx) = gen_rtx_SET (reg, mem);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1;
+ gmask = gmask & ~(1L << regno);
+ }
+
+ /* Restore remaining registers using LD instructions. */
+ for (regno = 0; regno <= 31; regno++)
+ {
+ if ((gmask & (1L << regno)) == 0)
+ continue;
+
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ off));
+ rtx tmp = frame_move_inc (reg, mem, stack_pointer_rtx, 0);
+ add_reg_note (tmp, REG_CFA_RESTORE, reg);
+ off += UNITS_PER_WORD;
+ }
+
+ /* Emit millicode call. */
+ if (return_p)
+ {
+ reg = gen_rtx_REG (Pmode, 12);
+ frame_insn (gen_rtx_SET (reg, GEN_INT (off)));
+ frame_allocated += off;
+ insn = emit_jump_insn (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else
+ insn = frame_insn (insn);
+
+ /* Add DWARF info. */
+ for (regno = start_reg; regno <= end_reg; regno++)
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+
+ }
+
+ if (restore_blink && !return_p)
+ {
+ reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
+ off));
+ insn = frame_insn (gen_rtx_SET (reg, mem));
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+ }
+
+ return frame_allocated;
+}
+
+/* Set up the stack and frame pointer (if desired) for the function. */
+
+void
+arc_expand_prologue (void)
+{
+ int size;
+ unsigned int gmask = cfun->machine->frame_info.gmask;
+ struct arc_frame_info *frame = &cfun->machine->frame_info;
+ unsigned int frame_size_to_allocate;
+ int first_offset = 0;
+ unsigned int fn_type = arc_compute_function_type (cfun);
+ bool save_blink = false;
+ bool save_fp = false;
+
+ /* Naked functions don't have prologue. */
+ if (ARC_NAKED_P (fn_type))
+ {
+ if (flag_stack_usage_info)
+ current_function_static_stack_size = 0;
+ return;
+ }
+
+ /* Compute total frame size. */
+ size = arc_compute_frame_size ();
+
+ if (flag_stack_usage_info)
+ current_function_static_stack_size = size;
+
+ /* Keep track of frame size to be allocated. */
+ frame_size_to_allocate = size;
+
+ /* These cases shouldn't happen. Catch them now. */
+ gcc_assert (!(size == 0 && gmask));
+
+ /* Allocate space for register arguments if this is a variadic function. */
+ if (frame->pretend_size != 0)
+ first_offset = -frame->pretend_size;
+
+ /* IRQ using automatic save mechanism will save the register before
+ anything we do. */
+ if (ARC_AUTO_IRQ_P (fn_type)
+ && !ARC_FAST_INTERRUPT_P (fn_type))
{
- frame_move (frame_pointer_rtx, stack_pointer_rtx);
+ frame_stack_add (first_offset);
+ first_offset = 0;
+ arc_dwarf_emit_irq_save_regs ();
}
- /* ??? We don't handle the case where the saved regs are more than 252
- bytes away from sp. This can be handled by decrementing sp once, saving
- the regs, and then decrementing it again. The epilogue doesn't have this
- problem as the `ld' insn takes reg+limm values (though it would be more
- efficient to avoid reg+limm). */
+ save_blink = arc_must_save_return_addr (cfun)
+ && !ARC_AUTOBLINK_IRQ_P (fn_type);
+ save_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type);
+
+ /* Use enter/leave only for non-interrupt functions. */
+ if (TARGET_CODE_DENSITY
+ && TARGET_CODE_DENSITY_FRAME
+ && !ARC_AUTOFP_IRQ_P (fn_type)
+ && !ARC_AUTOBLINK_IRQ_P (fn_type)
+ && !ARC_INTERRUPT_P (fn_type)
+ && arc_enter_leave_p (gmask))
+ frame_size_to_allocate -= arc_save_callee_enter (gmask, save_blink,
+ save_fp,
+ first_offset);
+ else if (frame->millicode_end_reg > 14)
+ frame_size_to_allocate -= arc_save_callee_milli (gmask, save_blink,
+ save_fp,
+ first_offset,
+ frame->reg_size);
+ else
+ frame_size_to_allocate -= arc_save_callee_saves (gmask, save_blink, save_fp,
+ first_offset);
- frame_size_to_allocate -= first_offset;
/* Allocate the stack frame. */
if (frame_size_to_allocate > 0)
{
will prevent the scheduler from moving stores to the frame
before the stack adjustment. */
if (arc_frame_pointer_needed ())
- emit_insn (gen_stack_tie (stack_pointer_rtx,
- hard_frame_pointer_rtx));
+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
}
-
- /* Setup the gp register, if needed. */
- if (crtl->uses_pic_offset_table)
- arc_finalize_pic ();
}
/* Do any necessary cleanup after a function to restore stack, frame,
void
arc_expand_epilogue (int sibcall_p)
{
- int size = get_frame_size ();
+ int size;
unsigned int fn_type = arc_compute_function_type (cfun);
-
- size = ARC_STACK_ALIGN (size);
- size = (!cfun->machine->frame_info.initialized
- ? arc_compute_frame_size (size)
- : cfun->machine->frame_info.total_size);
-
- unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
- unsigned int frame_size;
unsigned int size_to_deallocate;
int restored;
int can_trust_sp_p = !cfun->calls_alloca;
- int first_offset = 0;
- int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
- rtx insn;
+ int first_offset;
+ bool restore_fp = arc_frame_pointer_needed () && !ARC_AUTOFP_IRQ_P (fn_type);
+ bool restore_blink = arc_must_save_return_addr (cfun)
+ && !ARC_AUTOBLINK_IRQ_P (fn_type);
+ unsigned int gmask = cfun->machine->frame_info.gmask;
+ bool return_p = !sibcall_p && fn_type == ARC_FUNCTION_NORMAL
+ && !cfun->machine->frame_info.pretend_size;
+ struct arc_frame_info *frame = &cfun->machine->frame_info;
+
/* Naked functions don't have epilogue. */
if (ARC_NAKED_P (fn_type))
return;
+ size = arc_compute_frame_size ();
size_to_deallocate = size;
- frame_size = size - (pretend_size +
- cfun->machine->frame_info.reg_size +
- cfun->machine->frame_info.extra_size);
-
- /* ??? There are lots of optimizations that can be done here.
- EG: Use fp to restore regs if it's closer.
- Maybe in time we'll do them all. For now, always restore regs from
- sp, but don't restore sp if we don't have to. */
+ first_offset = size - (frame->pretend_size + frame->reg_size
+ + frame->extra_size);
if (!can_trust_sp_p)
gcc_assert (arc_frame_pointer_needed ());
- /* Restore stack pointer to the beginning of saved register area for
- ARCompact ISA. */
- if (frame_size)
+ if (TARGET_CODE_DENSITY
+ && TARGET_CODE_DENSITY_FRAME
+ && !ARC_AUTOFP_IRQ_P (fn_type)
+ && !ARC_AUTOBLINK_IRQ_P (fn_type)
+ && !ARC_INTERRUPT_P (fn_type)
+ && arc_enter_leave_p (gmask))
+ {
+ /* Using leave instruction. */
+ size_to_deallocate -= arc_restore_callee_leave (gmask, restore_blink,
+ restore_fp,
+ return_p,
+ first_offset);
+ if (return_p)
+ {
+ gcc_assert (size_to_deallocate == 0);
+ return;
+ }
+ }
+ else if (frame->millicode_end_reg > 14)
{
- if (arc_frame_pointer_needed ())
- frame_move (stack_pointer_rtx, frame_pointer_rtx);
- else
- first_offset = frame_size;
- size_to_deallocate -= frame_size;
+ /* Using millicode calls. */
+ size_to_deallocate -= arc_restore_callee_milli (gmask, restore_blink,
+ restore_fp,
+ return_p,
+ first_offset);
+ if (return_p)
+ {
+ gcc_assert (size_to_deallocate == 0);
+ return;
+ }
}
- else if (!can_trust_sp_p)
- frame_stack_add (-frame_size);
+ else
+ size_to_deallocate -= arc_restore_callee_saves (gmask, restore_blink,
+ restore_fp,
+ first_offset,
+ size_to_deallocate);
+ /* Keep track of how much of the stack pointer we've restored. It
+ makes the following a lot more readable. */
+ restored = size - size_to_deallocate;
- /* Restore any saved registers. */
- if (arc_frame_pointer_needed ()
- && !ARC_AUTOFP_IRQ_P (fn_type))
- {
- rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+ if (size > restored)
+ frame_stack_add (size - restored);
- insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
- stack_pointer_rtx, 0);
- add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
- add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (SImode, stack_pointer_rtx,
- 4));
- size_to_deallocate -= UNITS_PER_WORD;
- }
+ /* For frames that use __builtin_eh_return, the register defined by
+ EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths.
+ On eh_return paths however, the register is set to the value that
+ should be added to the stack pointer in order to restore the
+ correct stack pointer for the exception handling frame.
- /* Load blink after the calls to thunk calls in case of optimize size. */
- if (millicode_p)
- {
- int sibthunk_p = (!sibcall_p
- && fn_type == ARC_FUNCTION_NORMAL
- && !cfun->machine->frame_info.pretend_size);
+ For ARC we are going to use r2 for EH_RETURN_STACKADJ_RTX, add
+ this onto the stack for eh_return frames. */
+ if (crtl->calls_eh_return)
+ emit_insn (gen_add2_insn (stack_pointer_rtx,
+ EH_RETURN_STACKADJ_RTX));
- gcc_assert (!(cfun->machine->frame_info.gmask
- & (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
- arc_save_restore (stack_pointer_rtx,
- cfun->machine->frame_info.gmask,
- 1 + sibthunk_p, &first_offset);
- if (sibthunk_p)
- return;
- }
- /* If we are to restore registers, and first_offset would require
- a limm to be encoded in a PRE_MODIFY, yet we can add it with a
- fast add to the stack pointer, do this now. */
- if ((!SMALL_INT (first_offset)
- && cfun->machine->frame_info.gmask
- && ((TARGET_ARC700 && !optimize_size)
- ? first_offset <= 0x800
- : satisfies_constraint_C2a (GEN_INT (first_offset))))
- /* Also do this if we have both gprs and return
- address to restore, and they both would need a LIMM. */
- || (arc_must_save_return_addr (cfun)
- && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
- && cfun->machine->frame_info.gmask))
- {
- frame_stack_add (first_offset);
- first_offset = 0;
- }
- if (arc_must_save_return_addr (cfun)
- && !ARC_AUTOBLINK_IRQ_P (fn_type))
- {
- rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
- int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
- rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
- HOST_WIDE_INT cfa_adjust = 0;
+ /* Emit the return instruction. */
+ if (sibcall_p == FALSE)
+ emit_jump_insn (gen_simple_return ());
+}
- /* If the load of blink would need a LIMM, but we can add
- the offset quickly to sp, do the latter. */
- if (!SMALL_INT (ra_offs >> 2)
- && !cfun->machine->frame_info.gmask
- && ((TARGET_ARC700 && !optimize_size)
- ? ra_offs <= 0x800
- : satisfies_constraint_C2a (GEN_INT (ra_offs))))
- {
- size_to_deallocate -= ra_offs - first_offset;
- first_offset = 0;
- frame_stack_add (ra_offs);
- ra_offs = 0;
- addr = stack_pointer_rtx;
- }
- /* See if we can combine the load of the return address with the
- final stack adjustment.
- We need a separate load if there are still registers to
- restore. We also want a separate load if the combined insn
- would need a limm, but a separate load doesn't. */
- if (ra_offs
- && !cfun->machine->frame_info.gmask
- && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
- {
- addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
- cfa_adjust = ra_offs;
- first_offset = 0;
- size_to_deallocate -= cfun->machine->frame_info.reg_size;
- }
- else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
- {
- addr = gen_rtx_POST_INC (Pmode, addr);
- cfa_adjust = GET_MODE_SIZE (Pmode);
- size_to_deallocate = 0;
- }
+/* Helper for {push/pop}_multi_operand: check if rtx OP is a suitable
+ construct to match either enter or leave instruction. Which one
+ which is selected by PUSH_P argument. */
- insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
- stack_pointer_rtx, addr);
- if (cfa_adjust)
- {
- enum reg_note note = REG_CFA_ADJUST_CFA;
+bool
+arc_check_multi (rtx op, bool push_p)
+{
+ HOST_WIDE_INT len = XVECLEN (op, 0);
+ unsigned int regno, i, start;
+ unsigned int memp = push_p ? 0 : 1;
+ rtx elt;
- add_reg_note (insn, note,
- gen_rtx_SET (stack_pointer_rtx,
- plus_constant (SImode, stack_pointer_rtx,
- cfa_adjust)));
- }
- add_reg_note (insn, REG_CFA_RESTORE, ra);
- }
+ if (len <= 1)
+ return false;
+
+ start = 1;
+ elt = XVECEXP (op, 0, 0);
+ if (!push_p && GET_CODE (elt) == RETURN)
+ start = 2;
- if (!millicode_p)
+ for (i = start, regno = ENTER_LEAVE_START_REG; i < len; i++, regno++)
{
- if (cfun->machine->frame_info.reg_size)
- arc_save_restore (stack_pointer_rtx,
- /* The zeroing of these two bits is unnecessary, but leave this in for clarity. */
- cfun->machine->frame_info.gmask
- & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
- }
+ rtx elt = XVECEXP (op, 0, i);
+ rtx reg, mem, addr;
- /* The rest of this function does the following:
- ARCompact : handle epilogue_delay, restore sp (phase-2), return
- */
+ if (GET_CODE (elt) != SET)
+ return false;
+ mem = XEXP (elt, memp);
+ reg = XEXP (elt, 1 - memp);
- /* Keep track of how much of the stack pointer we've restored.
- It makes the following a lot more readable. */
- size_to_deallocate += first_offset;
- restored = size - size_to_deallocate;
+ if (!REG_P (reg)
+ || !MEM_P (mem))
+ return false;
- if (size > restored)
- frame_stack_add (size - restored);
+ /* Check for blink. */
+ if (REGNO (reg) == RETURN_ADDR_REGNUM
+ && i == start)
+ regno = 12;
+ else if (REGNO (reg) == FRAME_POINTER_REGNUM)
+ ++i;
+ else if (REGNO (reg) != regno)
+ return false;
- /* Emit the return instruction. */
- if (sibcall_p == FALSE)
- emit_jump_insn (gen_simple_return ());
+ addr = XEXP (mem, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ if (!rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
+ || !CONST_INT_P (XEXP (addr, 1)))
+ return false;
+ }
+ else
+ {
+ if (!rtx_equal_p (stack_pointer_rtx, addr))
+ return false;
+ }
+ }
+ return true;
}
-/* Return the offset relative to the stack pointer where the return address
- is stored, or -1 if it is not stored. */
+/* Return rtx for the location of the return address on the stack,
+ suitable for use in __builtin_eh_return. The new return address
+ will be written to this location in order to redirect the return to
+ the exception handler. Our ABI says the blink is pushed first on
+ stack followed by an unknown number of register saves, and finally
+ by fp. Hence we cannot use the EH_RETURN_ADDRESS macro as the
+ stack is not finalized. */
-int
-arc_return_slot_offset ()
-{
- struct arc_frame_info *afi = &cfun->machine->frame_info;
-
- return (afi->save_return_addr
- ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
+void
+arc_eh_return_address_location (rtx source)
+{
+ rtx mem;
+ int offset;
+ struct arc_frame_info *afi;
+
+ arc_compute_frame_size ();
+ afi = &cfun->machine->frame_info;
+
+ gcc_assert (crtl->calls_eh_return);
+ gcc_assert (afi->save_return_addr);
+ gcc_assert (afi->extra_size >= 4);
+
+ /* The '-4' removes the size of the return address, which is
+ included in the 'extra_size' field. */
+ offset = afi->reg_size + afi->extra_size - 4;
+ mem = gen_frame_mem (Pmode,
+ plus_constant (Pmode, frame_pointer_rtx, offset));
+
+ /* The following should not be needed, and is, really a hack. The
+ issue being worked around here is that the DSE (Dead Store
+ Elimination) pass will remove this write to the stack as it sees
+ a single store and no corresponding read. The read however
+ occurs in the epilogue code, which is not added into the function
+ rtl until a later pass. So, at the time of DSE, the decision to
+ remove this store seems perfectly sensible. Marking the memory
+ address as volatile obviously has the effect of preventing DSE
+ from removing the store. */
+ MEM_VOLATILE_P (mem) = true;
+ emit_move_insn (mem, source);
}
/* PIC */
unspec));
}
-/* Emit special PIC prologues and epilogues. */
-/* If the function has any GOTOFF relocations, then the GOTBASE
- register has to be setup in the prologue
- The instruction needed at the function start for setting up the
- GOTBASE register is
- add rdest, pc,
- ----------------------------------------------------------
- The rtl to be emitted for this should be:
- set (reg basereg)
- (plus (reg pc)
- (const (unspec (symref _DYNAMIC) 3)))
- ---------------------------------------------------------- */
-
-static void
-arc_finalize_pic (void)
-{
- rtx pat;
- rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
-
- if (crtl->uses_pic_offset_table == 0)
- return;
-
- gcc_assert (flag_pic != 0);
-
- pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
- pat = arc_unspec_offset (pat, ARC_UNSPEC_GOT);
- pat = gen_rtx_SET (baseptr_rtx, pat);
-
- emit_insn (pat);
-}
-\f
/* !TARGET_BARREL_SHIFTER support. */
/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
kind of shift. */
\f
/* Nested function support. */
-/* Directly store VALUE into memory object BLOCK at OFFSET. */
+/* Output assembler code for a block containing the constant parts of
+ a trampoline, leaving space for variable parts. A trampoline looks
+ like this:
+
+ ld_s r12,[pcl,8]
+ ld r11,[pcl,12]
+ j_s [r12]
+ .word function's address
+ .word static chain value
+
+*/
static void
-emit_store_direct (rtx block, int offset, int value)
+arc_asm_trampoline_template (FILE *f)
{
- emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
- force_reg (SImode,
- gen_int_mode (value, SImode))));
+ asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", ARC_TEMP_SCRATCH_REG);
+ asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]);
+ asm_fprintf (f, "\tj_s\t[%s]\n", ARC_TEMP_SCRATCH_REG);
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
}
/* Emit RTL insns to initialize the variable parts of a trampoline.
- FNADDR is an RTX for the address of the function's pure code.
- CXT is an RTX for the static chain value for the function. */
-/* With potentially multiple shared objects loaded, and multiple stacks
- present for multiple thereds where trampolines might reside, a simple
- range check will likely not suffice for the profiler to tell if a callee
- is a trampoline. We a speedier check by making the trampoline start at
- an address that is not 4-byte aligned.
- A trampoline looks like this:
-
- nop_s 0x78e0
-entry:
- ld_s r12,[pcl,12] 0xd403
- ld r11,[pcl,12] 0x170c 700b
- j_s [r12] 0x7c00
- nop_s 0x78e0
+ FNADDR is an RTX for the address of the function's pure code. CXT
+ is an RTX for the static chain value for the function.
The fastest trampoline to execute for trampolines within +-8KB of CTX
would be:
+
add2 r11,pcl,s12
j [limm] 0x20200f80 limm
- and that would also be faster to write to the stack by computing the offset
- from CTX to TRAMP at compile time. However, it would really be better to
- get rid of the high cost of cache invalidation when generating trampolines,
- which requires that the code part of trampolines stays constant, and
- additionally either
- - making sure that no executable code but trampolines is on the stack,
- no icache entries linger for the area of the stack from when before the
- stack was allocated, and allocating trampolines in trampoline-only
- cache lines
- or
- - allocate trampolines fram a special pool of pre-allocated trampolines. */
+
+ and that would also be faster to write to the stack by computing
+ the offset from CTX to TRAMP at compile time. However, it would
+ really be better to get rid of the high cost of cache invalidation
+ when generating trampolines, which requires that the code part of
+ trampolines stays constant, and additionally either making sure
+ that no executable code but trampolines is on the stack, no icache
+ entries linger for the area of the stack from when before the stack
+ was allocated, and allocating trampolines in trampoline-only cache
+ lines or allocate trampolines fram a special pool of pre-allocated
+ trampolines. */
static void
arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
{
rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
- emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
- emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
- emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
- emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
- emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
- emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
+ emit_block_move (tramp, assemble_trampoline_template (),
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+ emit_move_insn (adjust_address (tramp, SImode, 8), fnaddr);
+ emit_move_insn (adjust_address (tramp, SImode, 12), cxt);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+ LCT_NORMAL, VOIDmode, XEXP (tramp, 0), Pmode,
+ plus_constant (Pmode, XEXP (tramp, 0), TRAMPOLINE_SIZE),
+ Pmode);
}
-/* Allow the profiler to easily distinguish trampolines from normal
- functions. */
+/* Add the given function declaration to emit code in JLI section. */
-static rtx
-arc_trampoline_adjust_address (rtx addr)
+static void
+arc_add_jli_section (rtx pat)
{
- return plus_constant (Pmode, addr, 2);
+ const char *name;
+ tree attrs;
+ arc_jli_section *sec = arc_jli_sections, *new_section;
+ tree decl = SYMBOL_REF_DECL (pat);
+
+ if (!pat)
+ return;
+
+ if (decl)
+ {
+ /* For fixed locations do not generate the jli table entry. It
+ should be provided by the user as an asm file. */
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("jli_fixed", attrs))
+ return;
+ }
+
+ name = XSTR (pat, 0);
+
+ /* Don't insert the same symbol twice. */
+ while (sec != NULL)
+ {
+ if(strcmp (name, sec->name) == 0)
+ return;
+ sec = sec->next;
+ }
+
+ /* New name, insert it. */
+ new_section = (arc_jli_section *) xmalloc (sizeof (arc_jli_section));
+ gcc_assert (new_section != NULL);
+ new_section->name = name;
+ new_section->next = arc_jli_sections;
+ arc_jli_sections = new_section;
}
/* This is set briefly to 1 when we output a ".as" address modifer, and then
reset when we output the scaled address. */
static int output_scaled = 0;
+/* Set when we force sdata output. */
+static int output_sdata = 0;
+
/* Print operand X (an rtx) in assembler syntax to file FILE.
CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
For `%' followed by punctuation, CODE is the punctuation and X is null. */
'd'
'D'
'R': Second word
- 'S'
+ 'S': JLI instruction
+ 'j': used by mov instruction to properly emit jli related labels.
'B': Branch comparison operand - suppress sda reference
'H': Most significant word
'L': Least significant word
case 'c':
if (GET_CODE (x) == CONST_INT)
- fprintf (file, "%d", INTVAL (x) );
+ fprintf (file, "%ld", INTVAL (x) );
else
output_operand_lossage ("invalid operands to %%c code");
else
output_operand_lossage ("invalid operand to %%R code");
return;
+ case 'j':
case 'S' :
- /* FIXME: remove %S option. */
- break;
+ if (GET_CODE (x) == SYMBOL_REF
+ && arc_is_jli_call_p (x))
+ {
+ if (SYMBOL_REF_DECL (x))
+ {
+ tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
+ ? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
+ : NULL_TREE);
+ if (lookup_attribute ("jli_fixed", attrs))
+ {
+ /* No special treatment for jli_fixed functions. */
+ if (code == 'j')
+ break;
+ fprintf (file, "%ld\t; @",
+ TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
+ assemble_name (file, XSTR (x, 0));
+ return;
+ }
+ }
+ fprintf (file, "@__jli.");
+ assemble_name (file, XSTR (x, 0));
+ if (code == 'j')
+ arc_add_jli_section (x);
+ return;
+ }
+ if (GET_CODE (x) == SYMBOL_REF
+ && arc_is_secure_call_p (x))
+ {
+ /* No special treatment for secure functions. */
+ if (code == 'j' )
+ break;
+ tree attrs = (TREE_TYPE (SYMBOL_REF_DECL (x)) != error_mark_node
+ ? TYPE_ATTRIBUTES (TREE_TYPE (SYMBOL_REF_DECL (x)))
+ : NULL_TREE);
+ fprintf (file, "%ld\t; @",
+ TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attrs))));
+ assemble_name (file, XSTR (x, 0));
+ return;
+ }
+ break;
case 'B' /* Branch or other LIMM ref - must not use sda references. */ :
if (CONSTANT_P (x))
{
fputs (".as", file);
output_scaled = 1;
}
- else if (LEGITIMATE_SMALL_DATA_ADDRESS_P (addr)
- && GET_MODE_SIZE (GET_MODE (x)) > 1)
+ break;
+ case SYMBOL_REF:
+ case CONST:
+ if (legitimate_small_data_address_p (addr)
+ && GET_MODE_SIZE (GET_MODE (x)) > 1)
{
- tree decl = NULL_TREE;
- int align = 0;
- if (GET_CODE (XEXP (addr, 1)) == SYMBOL_REF)
- decl = SYMBOL_REF_DECL (XEXP (addr, 1));
- else if (GET_CODE (XEXP (XEXP (XEXP (addr, 1), 0), 0))
- == SYMBOL_REF)
- decl = SYMBOL_REF_DECL (XEXP (XEXP (XEXP (addr, 1), 0), 0));
- if (decl)
- align = DECL_ALIGN (decl);
- align = align / BITS_PER_UNIT;
- if ((GET_MODE_SIZE (GET_MODE (x)) == 2)
- && align && ((align & 1) == 0))
- fputs (".as", file);
- if ((GET_MODE_SIZE (GET_MODE (x)) >= 4)
- && align && ((align & 3) == 0))
+ int align = get_symbol_alignment (addr);
+ int mask = 0;
+ switch (GET_MODE (x))
+ {
+ case E_HImode:
+ mask = 1;
+ break;
+ default:
+ mask = 3;
+ break;
+ }
+ if (align && ((align & mask) == 0))
fputs (".as", file);
}
break;
refs are defined to use the cache bypass mechanism. */
if (GET_CODE (x) == MEM)
{
- if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
+ if ((MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET)
+ || arc_is_uncached_mem_p (x))
fputs (".di", file);
}
else
}
break;
case '&':
- if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
+ if (TARGET_ANNOTATE_ALIGN)
fprintf (file, "; unalign: %d", cfun->machine->unalign);
return;
case '+':
rtx addr = XEXP (x, 0);
int size = GET_MODE_SIZE (GET_MODE (x));
+ if (legitimate_small_data_address_p (addr))
+ output_sdata = 1;
+
fputc ('[', file);
switch (GET_CODE (addr))
|| XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_GD)))
arc_output_pic_addr_const (file, x, code);
else
- {
- /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
- with asm_output_symbol_ref */
- if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
- {
- x = XEXP (x, 0);
- output_addr_const (file, XEXP (x, 0));
- if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
- fprintf (file, "@sda");
-
- if (GET_CODE (XEXP (x, 1)) != CONST_INT
- || INTVAL (XEXP (x, 1)) >= 0)
- fprintf (file, "+");
- output_addr_const (file, XEXP (x, 1));
- }
- else
- output_addr_const (file, x);
- }
- if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
- fprintf (file, "@sda");
+ output_addr_const (file, x);
break;
}
}
case REG :
fputs (reg_names[REGNO (addr)], file);
break;
- case SYMBOL_REF :
+ case SYMBOL_REF:
+ if (output_sdata)
+ fputs ("gp,", file);
output_addr_const (file, addr);
- if (SYMBOL_REF_SMALL_P (addr))
- fprintf (file, "@sda");
+ if (output_sdata)
+ fputs ("@sda", file);
+ output_sdata = 0;
break;
case PLUS :
if (GET_CODE (XEXP (addr, 0)) == MULT)
&& ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
}
-/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
- and look inside SEQUENCEs. */
-
-static rtx_insn *
-arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
-{
- rtx pat;
-
- do
- {
- if (statep)
- arc_ccfsm_post_advance (insn, statep);
- insn = NEXT_INSN (insn);
- if (!insn || BARRIER_P (insn))
- return NULL;
- if (statep)
- arc_ccfsm_advance (insn, statep);
- }
- while (NOTE_P (insn)
- || (cfun->machine->arc_reorg_started
- && LABEL_P (insn) && !label_to_alignment (insn))
- || (NONJUMP_INSN_P (insn)
- && (GET_CODE (PATTERN (insn)) == USE
- || GET_CODE (PATTERN (insn)) == CLOBBER)));
- if (!LABEL_P (insn))
- {
- gcc_assert (INSN_P (insn));
- pat = PATTERN (insn);
- if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
- return NULL;
- if (GET_CODE (pat) == SEQUENCE)
- return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
- }
- return insn;
-}
-
/* When deciding if an insn should be output short, we want to know something
about the following insns:
- if another insn follows which we know we can output as a short insn
zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
If CHECK_ATTR is greater than 0, check the iscompact attribute first. */
-int
+static int
arc_verify_short (rtx_insn *insn, int, int check_attr)
{
enum attr_iscompact iscompact;
- struct machine_function *machine;
if (check_attr > 0)
{
if (iscompact == ISCOMPACT_FALSE)
return 0;
}
- machine = cfun->machine;
-
- if (machine->force_short_suffix >= 0)
- return machine->force_short_suffix;
return (get_attr_length (insn) & 2) != 0;
}
if (TARGET_DUMPISIZE)
fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
- /* Output a nop if necessary to prevent a hazard.
- Don't do this for delay slots: inserting a nop would
- alter semantics, and the only time we would find a hazard is for a
- call function result - and in that case, the hazard is spurious to
- start with. */
- if (PREV_INSN (insn)
- && PREV_INSN (NEXT_INSN (insn)) == insn
- && arc_hazard (prev_real_insn (insn), insn))
- {
- current_output_insn =
- emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
- final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
- current_output_insn = insn;
- }
- /* Restore extraction data which might have been clobbered by arc_hazard. */
- extract_constrain_insn_cached (insn);
-
if (!cfun->machine->prescan_initialized)
{
/* Clear lingering state from branch shortening. */
cfun->machine->prescan_initialized = 1;
}
arc_ccfsm_advance (insn, &arc_ccfsm_current);
-
- cfun->machine->size_reason = 0;
}
/* Given FROM and TO register numbers, say whether this elimination is allowed.
int
arc_initial_elimination_offset (int from, int to)
{
- if (! cfun->machine->frame_info.initialized)
- arc_compute_frame_size (get_frame_size ());
+ if (!cfun->machine->frame_info.initialized)
+ arc_compute_frame_size ();
if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
{
static bool
arc_frame_pointer_required (void)
{
- return cfun->calls_alloca;
+ return cfun->calls_alloca || crtl->calls_eh_return;
}
/* Return the destination address of a branch. */
-int
+static int
branch_dest (rtx branch)
{
rtx pat = PATTERN (branch);
{
default_file_start ();
fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
+
+ /* Set some want to have build attributes. */
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_PCS_config, %d\n",
+ ATTRIBUTE_PCS);
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_rf16, %d\n",
+ TARGET_RF16 ? 1 : 0);
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_pic, %d\n",
+ flag_pic ? 2 : 0);
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_tls, %d\n",
+ (arc_tp_regno != -1) ? 1 : 0);
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_sda, %d\n",
+ TARGET_NO_SDATA_SET ? 0 : 2);
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_ABI_exceptions, %d\n",
+ TARGET_OPTFPE ? 1 : 0);
+ if (TARGET_V2)
+ asm_fprintf (asm_out_file, "\t.arc_attribute Tag_ARC_CPU_variation, %d\n",
+ (arc_tune < ARC_TUNE_CORE_3) ? 2 :
+ (arc_tune == ARC_TUNE_CORE_3 ? 3 : 4));
+}
+
+/* Implement `TARGET_ASM_FILE_END'. */
+/* Outputs to the stdio stream FILE jli related text. */
+
+void arc_file_end (void)
+{
+ arc_jli_section *sec = arc_jli_sections;
+
+ while (sec != NULL)
+ {
+ fprintf (asm_out_file, "\n");
+ fprintf (asm_out_file, "# JLI entry for function ");
+ assemble_name (asm_out_file, sec->name);
+ fprintf (asm_out_file, "\n\t.section .jlitab, \"axG\", @progbits, "
+ ".jlitab.");
+ assemble_name (asm_out_file, sec->name);
+ fprintf (asm_out_file,", comdat\n");
+
+ fprintf (asm_out_file, "\t.align\t4\n");
+ fprintf (asm_out_file, "__jli.");
+ assemble_name (asm_out_file, sec->name);
+ fprintf (asm_out_file, ":\n\t.weak __jli.");
+ assemble_name (asm_out_file, sec->name);
+ fprintf (asm_out_file, "\n\tb\t@");
+ assemble_name (asm_out_file, sec->name);
+ fprintf (asm_out_file, "\n");
+ sec = sec->next;
+ }
+ file_end_indicate_exec_stack ();
}
/* Cost functions. */
return false;
}
-/* Get the thread pointer. */
-
-static rtx
-arc_get_tp (void)
-{
- /* If arc_tp_regno has been set, we can use that hard register
- directly as a base register. */
- if (arc_tp_regno != -1)
- return gen_rtx_REG (Pmode, arc_tp_regno);
-
- /* Otherwise, call __read_tp. Copy the result to a pseudo to avoid
- conflicts with function arguments / results. */
- rtx reg = gen_reg_rtx (Pmode);
- emit_insn (gen_tls_load_tp_soft ());
- emit_move_insn (reg, gen_rtx_REG (Pmode, R0_REG));
- return reg;
-}
+/* The __tls_get_attr symbol. */
+static GTY(()) rtx arc_tls_symbol;
-/* Helper to be used by TLS Global dynamic model. */
+/* Emit a call to __tls_get_addr. TI is the argument to this function.
+ RET is an RTX for the return value location. The entire insn sequence
+ is returned. */
static rtx
-arc_emit_call_tls_get_addr (rtx sym, int reloc, rtx eqv)
+arc_call_tls_get_addr (rtx ti)
{
- rtx r0 = gen_rtx_REG (Pmode, R0_REG);
- rtx call_fusage = NULL_RTX;
-
- start_sequence ();
-
- rtx x = arc_unspec_offset (sym, reloc);
- emit_move_insn (r0, x);
- use_reg (&call_fusage, r0);
+ rtx arg = gen_rtx_REG (Pmode, R0_REG);
+ rtx ret = gen_rtx_REG (Pmode, R0_REG);
+ rtx fn;
+ rtx_insn *insn;
- gcc_assert (reloc == UNSPEC_TLS_GD);
- rtx call_insn = emit_call_insn (gen_tls_gd_get_addr (sym));
- /* Should we set RTL_CONST_CALL_P? We read memory, but not in a
- way that the application should care. */
- RTL_PURE_CALL_P (call_insn) = 1;
- add_function_usage_to (call_insn, call_fusage);
+ if (!arc_tls_symbol)
+ arc_tls_symbol = init_one_libfunc ("__tls_get_addr");
- rtx_insn *insns = get_insns ();
- end_sequence ();
+ emit_move_insn (arg, ti);
+ fn = gen_rtx_MEM (SImode, arc_tls_symbol);
+ insn = emit_call_insn (gen_call_value (ret, fn, const0_rtx));
+ RTL_CONST_CALL_P (insn) = 1;
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), ret);
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), arg);
- rtx dest = gen_reg_rtx (Pmode);
- emit_libcall_block (insns, dest, r0, eqv);
- return dest;
+ return ret;
}
#define DTPOFF_ZERO_SYM ".tdata"
static rtx
arc_legitimize_tls_address (rtx addr, enum tls_model model)
{
+ rtx tmp;
+
if (!flag_pic && model == TLS_MODEL_LOCAL_DYNAMIC)
model = TLS_MODEL_LOCAL_EXEC;
+
+ /* The TP pointer needs to be set. */
+ gcc_assert (arc_tp_regno != -1);
+
switch (model)
{
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ tmp = gen_reg_rtx (Pmode);
+ emit_move_insn (tmp, arc_unspec_offset (addr, UNSPEC_TLS_GD));
+ return arc_call_tls_get_addr (tmp);
+
case TLS_MODEL_LOCAL_DYNAMIC:
rtx base;
tree decl;
const char *base_name;
- rtvec v;
decl = SYMBOL_REF_DECL (addr);
base_name = DTPOFF_ZERO_SYM;
base_name = ".tbss";
base = gen_rtx_SYMBOL_REF (Pmode, base_name);
- if (strcmp (base_name, DTPOFF_ZERO_SYM) == 0)
- {
- if (!flag_pic)
- goto local_exec;
- v = gen_rtvec (1, addr);
- }
- else
- v = gen_rtvec (2, addr, base);
- addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_TLS_OFF);
- addr = gen_rtx_CONST (Pmode, addr);
- base = arc_legitimize_tls_address (base, TLS_MODEL_GLOBAL_DYNAMIC);
- return gen_rtx_PLUS (Pmode, force_reg (Pmode, base), addr);
-
- case TLS_MODEL_GLOBAL_DYNAMIC:
- return arc_emit_call_tls_get_addr (addr, UNSPEC_TLS_GD, addr);
+ tmp = gen_reg_rtx (Pmode);
+ emit_move_insn (tmp, arc_unspec_offset (base, UNSPEC_TLS_GD));
+ base = arc_call_tls_get_addr (tmp);
+ return gen_rtx_PLUS (Pmode, force_reg (Pmode, base),
+ arc_unspec_offset (addr, UNSPEC_TLS_OFF));
case TLS_MODEL_INITIAL_EXEC:
addr = arc_unspec_offset (addr, UNSPEC_TLS_IE);
addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
- return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
+ return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
case TLS_MODEL_LOCAL_EXEC:
- local_exec:
addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF);
- return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
+ return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, arc_tp_regno), addr);
+
default:
gcc_unreachable ();
}
bool
arc_legitimate_constant_p (machine_mode mode, rtx x)
{
- if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
- return false;
-
- if (!flag_pic && mode != Pmode)
- return true;
-
switch (GET_CODE (x))
{
case CONST:
return true;
if (legitimate_scaled_address_p (mode, x, strict))
return true;
- if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
+ if (legitimate_small_data_address_p (x))
return true;
if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
return true;
/* If we can't fold the alignment to a constant integer
whilst optimizing, this is probably a user error. */
if (optimize)
- warning (0, "__builtin_arc_aligned with non-constant alignment");
+ warning (0, "%<__builtin_arc_aligned%> with non-constant alignment");
}
else
{
/* Check alignTest is positive, and a power of two. */
if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
{
- error ("invalid alignment value for __builtin_arc_aligned");
+ error ("invalid alignment value for %<__builtin_arc_aligned%>");
return NULL_RTX;
}
fold (arg0);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
- if (!CONST_INT_P (op0) || !satisfies_constraint_L (op0))
- {
- error ("builtin operand should be an unsigned 6-bit value");
- return NULL_RTX;
- }
gcc_assert (icode != 0);
emit_insn (GEN_FCN (icode) (op0));
return NULL_RTX;
return false;
}
-/* Check that after all the constant folding, whether the operand to
- __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error. */
-
-bool
-check_if_valid_sleep_operand (rtx *operands, int opno)
-{
- switch (GET_CODE (operands[opno]))
- {
- case CONST :
- case CONST_INT :
- if( UNSIGNED_INT6 (INTVAL (operands[opno])))
- return true;
- /* FALLTHRU */
- default:
- fatal_error (input_location,
- "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
- break;
- }
- return false;
-}
-
/* Return true if it is ok to make a tail-call to DECL. */
static bool
-arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+arc_function_ok_for_sibcall (tree decl,
tree exp ATTRIBUTE_UNUSED)
{
+ tree attrs = NULL_TREE;
+
/* Never tailcall from an ISR routine - it needs a special exit sequence. */
if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
return false;
+ if (decl)
+ {
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+ if (lookup_attribute ("jli_always", attrs))
+ return false;
+ if (lookup_attribute ("jli_fixed", attrs))
+ return false;
+ if (lookup_attribute ("secure_call", attrs))
+ return false;
+ }
+
/* Everything else is ok. */
return true;
}
}
}
-
-/* This was in rtlanal.c, and can go in there when we decide we want
- to submit the change for inclusion in the GCC tree. */
-/* Like note_stores, but allow the callback to have side effects on the rtl
- (like the note_stores of yore):
- Call FUN on each register or MEM that is stored into or clobbered by X.
- (X would be the pattern of an insn). DATA is an arbitrary pointer,
- ignored by note_stores, but passed to FUN.
- FUN may alter parts of the RTL.
-
- FUN receives three arguments:
- 1. the REG, MEM, CC0 or PC being stored in or clobbered,
- 2. the SET or CLOBBER rtx that does the store,
- 3. the pointer DATA provided to note_stores.
-
- If the item being stored in or clobbered is a SUBREG of a hard register,
- the SUBREG will be passed. */
-
-/* For now. */ static
-void
-walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
-{
- int i;
-
- if (GET_CODE (x) == COND_EXEC)
- x = COND_EXEC_CODE (x);
-
- if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
- {
- rtx dest = SET_DEST (x);
-
- while ((GET_CODE (dest) == SUBREG
- && (!REG_P (SUBREG_REG (dest))
- || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
- || GET_CODE (dest) == ZERO_EXTRACT
- || GET_CODE (dest) == STRICT_LOW_PART)
- dest = XEXP (dest, 0);
-
- /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
- each of whose first operand is a register. */
- if (GET_CODE (dest) == PARALLEL)
- {
- for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
- if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
- (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
- }
- else
- (*fun) (dest, x, data);
- }
-
- else if (GET_CODE (x) == PARALLEL)
- for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
- walk_stores (XVECEXP (x, 0, i), fun, data);
-}
-
static bool
arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
machine_mode mode ATTRIBUTE_UNUSED,
return NULL;
}
+/* Return the next active insn, skiping the inline assembly code. */
+
+static rtx_insn *
+arc_active_insn (rtx_insn *insn)
+{
+ rtx_insn *nxt = next_active_insn (insn);
+
+ if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
+ nxt = next_active_insn (nxt);
+ return nxt;
+}
+
+/* Search for a sequence made out of two stores and a given number of
+ loads, insert a nop if required. */
+
+static void
+check_store_cacheline_hazard (void)
+{
+ rtx_insn *insn, *succ0, *insn1;
+ bool found = false;
+
+ for (insn = get_insns (); insn; insn = arc_active_insn (insn))
+ {
+ succ0 = arc_active_insn (insn);
+
+ if (!succ0)
+ return;
+
+ if (!single_set (insn) || !single_set (succ0))
+ continue;
+
+ if ((get_attr_type (insn) != TYPE_STORE)
+ || (get_attr_type (succ0) != TYPE_STORE))
+ continue;
+
+ /* Found at least two consecutive stores. Goto the end of the
+ store sequence. */
+ for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
+ if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
+ break;
+
+ /* Now, check the next two instructions for the following cases:
+ 1. next instruction is a LD => insert 2 nops between store
+ sequence and load.
+ 2. next-next instruction is a LD => inset 1 nop after the store
+ sequence. */
+ if (insn1 && single_set (insn1)
+ && (get_attr_type (insn1) == TYPE_LOAD))
+ {
+ found = true;
+ emit_insn_before (gen_nopv (), insn1);
+ emit_insn_before (gen_nopv (), insn1);
+ }
+ else
+ {
+ if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
+ {
+ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in
+ reorg, so it is safe to reuse it for avoiding the
+ current compare insn to be part of a BRcc
+ optimization. */
+ add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
+ }
+ insn1 = arc_active_insn (insn1);
+ if (insn1 && single_set (insn1)
+ && (get_attr_type (insn1) == TYPE_LOAD))
+ {
+ found = true;
+ emit_insn_before (gen_nopv (), insn1);
+ }
+ }
+
+ insn = insn1;
+ if (found)
+ found = false;
+ }
+}
+
/* Return true if a load instruction (CONSUMER) uses the same address as a
store instruction (PRODUCER). This function is used to avoid st/ld
address hazard in ARC700 cores. */
-bool
-arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+
+static bool
+arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
{
rtx in_set, out_set;
rtx out_addr, in_addr;
return false;
}
+/* Return TRUE is we have an store address hazard. */
+
+bool
+arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
+{
+ if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
+ return true;
+ return arc_store_addr_hazard_internal_p (producer, consumer);
+}
+
/* The same functionality as arc_hazard. It is called in machine
reorg before any other optimization. Hence, the NOP size is taken
into account when doing branch shortening. */
workaround_arc_anomaly (void)
{
rtx_insn *insn, *succ0;
+ rtx_insn *succ1;
/* For any architecture: call arc_hazard here. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
}
}
- if (TARGET_ARC700)
- {
- rtx_insn *succ1;
+ if (!TARGET_ARC700)
+ return;
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- {
- succ0 = next_real_insn (insn);
- if (arc_store_addr_hazard_p (insn, succ0))
- {
- emit_insn_after (gen_nopv (), insn);
- emit_insn_after (gen_nopv (), insn);
- continue;
- }
+ /* Old A7 are suffering of a cache hazard, and we need to insert two
+ nops between any sequence of stores and a load. */
+ if (arc_tune != ARC_TUNE_ARC7XX)
+ check_store_cacheline_hazard ();
- /* Avoid adding nops if the instruction between the ST and LD is
- a call or jump. */
- succ1 = next_real_insn (succ0);
- if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
- && arc_store_addr_hazard_p (insn, succ1))
- emit_insn_after (gen_nopv (), insn);
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ succ0 = next_real_insn (insn);
+ if (arc_store_addr_hazard_internal_p (insn, succ0))
+ {
+ emit_insn_after (gen_nopv (), insn);
+ emit_insn_after (gen_nopv (), insn);
+ continue;
}
+
+ /* Avoid adding nops if the instruction between the ST and LD is
+ a call or jump. */
+ succ1 = next_real_insn (succ0);
+ if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
+ && arc_store_addr_hazard_internal_p (insn, succ1))
+ emit_insn_after (gen_nopv (), insn);
}
}
rtx test;
rtx insn = loop->loop_end;
- if (TARGET_V2
+ if (TARGET_DBNZ
&& (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH))
&& REG_P (loop->iter_reg))
{
- /* TARGET_V2 has dbnz instructions. */
+ /* TARGET_V2 core3 has dbnz instructions. */
test = gen_dbnz (loop->iter_reg, loop->start_label);
insn = emit_jump_insn_before (test, loop->loop_end);
}
delete_insn (loop->loop_end);
}
+/* Return the next insn after INSN that is not a NOTE, but stop the
+ search before we enter another basic block. This routine does not
+ look inside SEQUENCEs. */
+
+static rtx_insn *
+next_nonnote_insn_bb (rtx_insn *insn)
+{
+ while (insn)
+ {
+ insn = NEXT_INSN (insn);
+ if (insn == 0 || !NOTE_P (insn))
+ break;
+ if (NOTE_INSN_BASIC_BLOCK_P (insn))
+ return NULL;
+ }
+
+ return insn;
+}
+
/* Optimize LOOP. */
static bool
int i;
edge entry_edge;
basic_block entry_bb, bb;
- rtx iter_reg, end_label;
- rtx_insn *insn, *seq, *entry_after, *last_insn;
+ rtx iter_reg;
+ rtx_insn *insn, *seq, *entry_after, *last_insn, *end_label;
unsigned int length;
bool need_fix = false;
rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT);
if (loop->depth > 1)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d is not innermost\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d is not innermost\n",
+ loop->loop_no);
return false;
}
if (!loop->incoming_dest)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d has more than one entry\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d has more than one entry\n",
+ loop->loop_no);
return false;
}
if (loop->incoming_dest != loop->head)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d is not entered from head\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d is not entered from head\n",
+ loop->loop_no);
return false;
}
if (loop->has_call || loop->has_asm)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d has invalid insn\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d has invalid insn\n",
+ loop->loop_no);
return false;
}
if (loop->iter_reg_used || loop->iter_reg_used_outside)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d uses iterator\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d uses iterator\n",
+ loop->loop_no);
return false;
}
for (insn = loop->start_label;
insn && insn != loop->loop_end;
insn = NEXT_INSN (insn))
- length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
+ {
+ length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
+ if (JUMP_TABLES_IN_TEXT_SECTION
+ && JUMP_TABLE_DATA_P (insn))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has a jump table\n",
+ loop->loop_no);
+ return false;
+ }
+ }
if (!insn)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+ loop->loop_no);
return false;
}
fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
return false;
}
+ else if (!loop->length)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no);
+ return false;
+ }
- /* Check if we use a register or not. */
+ /* Check if we use a register or not. */
if (!REG_P (loop->iter_reg))
{
if (dump_file)
- fprintf (dump_file, ";; loop %d iterator is MEM\n",
- loop->loop_no);
+ fprintf (dump_file, ";; loop %d iterator is MEM\n",
+ loop->loop_no);
+ return false;
+ }
+
+ /* Check if we use a register or not. */
+ if (!REG_P (loop->iter_reg))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d iterator is MEM\n",
+ loop->loop_no);
return false;
}
|| (loop->incoming_src
&& REGNO_REG_SET_P (df_get_live_out (loop->incoming_src),
LP_COUNT)))
- return false;
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d, lp_count is alive", loop->loop_no);
+ return false;
+ }
else
need_fix = true;
}
&& INSN_P (last_insn)
&& (JUMP_P (last_insn) || CALL_P (last_insn)
|| GET_CODE (PATTERN (last_insn)) == SEQUENCE
- || get_attr_type (last_insn) == TYPE_BRCC
- || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))
+ /* At this stage we can have (insn (clobber (mem:BLK
+ (reg)))) instructions, ignore them. */
+ || (GET_CODE (PATTERN (last_insn)) != CLOBBER
+ && (get_attr_type (last_insn) == TYPE_BRCC
+ || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))))
{
if (loop->length + 2 > ARC_MAX_LOOP_LENGTH)
{
loop->loop_no);
last_insn = emit_insn_after (gen_nopv (), last_insn);
}
+
+ /* SAVE_NOTE is used by haifa scheduler. However, we are after it
+ and we can use it to indicate the last ZOL instruction cannot be
+ part of a delay slot. */
+ add_reg_note (last_insn, REG_SAVE_NOTE, GEN_INT (2));
+
loop->last_insn = last_insn;
/* Get the loop iteration register. */
{
/* The loop uses a R-register, but the lp_count is free, thus
use lp_count. */
- emit_insn (gen_movsi (lp_reg, iter_reg));
+ emit_insn (gen_rtx_SET (lp_reg, iter_reg));
SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT);
iter_reg = lp_reg;
if (dump_file)
}
}
- insn = emit_insn (gen_arc_lp (iter_reg,
- loop->start_label,
+ insn = emit_insn (gen_arc_lp (loop->start_label,
loop->end_label));
seq = get_insns ();
seq = emit_label_before (gen_label_rtx (), seq);
new_bb = create_basic_block (seq, insn, entry_bb);
FOR_EACH_EDGE (e, ei, loop->incoming)
- {
- if (!(e->flags & EDGE_FALLTHRU))
- redirect_edge_and_branch_force (e, new_bb);
- else
- redirect_edge_succ (e, new_bb);
- }
+ {
+ if (!(e->flags & EDGE_FALLTHRU))
+ redirect_edge_and_branch_force (e, new_bb);
+ else
+ redirect_edge_succ (e, new_bb);
+ }
make_edge (new_bb, loop->head, 0);
}
{
#if 0
while (DEBUG_INSN_P (entry_after)
- || (NOTE_P (entry_after)
- && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
+ || (NOTE_P (entry_after)
+ && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
/* Make sure we don't split a call and its corresponding
CALL_ARG_LOCATION note. */
- && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
+ && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
entry_after = NEXT_INSN (entry_after);
#endif
entry_after = next_nonnote_insn_bb (entry_after);
emit_insn_before (seq, entry_after);
}
- delete_insn (loop->loop_end);
/* Insert the loop end label before the last instruction of the
loop. */
emit_label_after (end_label, loop->last_insn);
+ /* Make sure we mark the begining and end label as used. */
+ LABEL_NUSES (loop->end_label)++;
+ LABEL_NUSES (loop->start_label)++;
return true;
}
reorg_loops (true, &arc_doloop_hooks);
}
+/* Scan all calls and add symbols to be emitted in the jli section if
+ needed. */
+
+static void
+jli_call_scan (void)
+{
+ rtx_insn *insn;
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (!CALL_P (insn))
+ continue;
+
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == COND_EXEC)
+ pat = COND_EXEC_CODE (pat);
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET)
+ pat = SET_SRC (pat);
+
+ pat = XEXP (XEXP (pat, 0), 0);
+ if (GET_CODE (pat) == SYMBOL_REF
+ && arc_is_jli_call_p (pat))
+ arc_add_jli_section (pat);
+ }
+}
+
+/* Add padding if necessary to avoid a mispredict. A return could
+ happen immediately after the function start. A call/return and
+ return/return must be 6 bytes apart to avoid mispredict. */
+
+static void
+pad_return (void)
+{
+ rtx_insn *insn;
+ long offset;
+
+ if (!TARGET_PAD_RETURN)
+ return;
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx_insn *prev0 = prev_active_insn (insn);
+ bool wantlong = false;
+
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
+ continue;
+
+ if (!prev0)
+ {
+ prev0 = emit_insn_before (gen_nopv (), insn);
+ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
+ so it is safe to reuse it for forcing a particular length
+ for an instruction. */
+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
+ emit_insn_before (gen_nopv (), insn);
+ continue;
+ }
+ offset = get_attr_length (prev0);
+
+ if (get_attr_length (prev0) == 2
+ && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
+ {
+ /* Force long version of the insn. */
+ wantlong = true;
+ offset += 2;
+ }
+
+ rtx_insn *prev = prev_active_insn (prev0);
+ if (prev)
+ offset += get_attr_length (prev);
+
+ prev = prev_active_insn (prev);
+ if (prev)
+ offset += get_attr_length (prev);
+
+ switch (offset)
+ {
+ case 2:
+ prev = emit_insn_before (gen_nopv (), insn);
+ add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
+ break;
+ case 4:
+ emit_insn_before (gen_nopv (), insn);
+ break;
+ default:
+ continue;
+ }
+
+ if (wantlong)
+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
+
+ /* Emit a blockage to avoid delay slot scheduling. */
+ emit_insn_before (gen_blockage (), insn);
+ }
+}
+
static int arc_reorg_in_progress = 0;
/* ARC's machince specific reorg function. */
arc_reorg_loops ();
workaround_arc_anomaly ();
+ jli_call_scan ();
+ pad_return ();
/* FIXME: should anticipate ccfsm action, generate special patterns for
to-be-deleted branches that have no delay slot and have at least the
if (!link_insn)
continue;
else
- /* Check if this is a data dependency. */
{
+ /* Check if this is a data dependency. */
rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
rtx cmp0, cmp1;
+ /* Make sure we can use it for brcc insns. */
+ if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
+ continue;
+
/* Ok this is the set cc. copy args here. */
op = XEXP (pc_target, 0);
Brcc.d b, c, s9
Brcc.d b, u6, s9
- For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
+ For cc={GT, LE, GTU, LEU}, u6=63 cannot be allowed,
since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
does not have a delay slot
arc_in_small_data_p (const_tree decl)
{
HOST_WIDE_INT size;
+ tree attr;
/* Only variables are going into small data area. */
if (TREE_CODE (decl) != VAR_DECL)
&& TREE_THIS_VOLATILE (decl))
return false;
+ /* Likewise for uncached data. */
+ attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("uncached", attr))
+ return false;
+
+ /* and for aux regs. */
+ attr = DECL_ATTRIBUTES (decl);
+ if (lookup_attribute ("aux", attr))
+ return false;
+
if (DECL_SECTION_NAME (decl) != 0)
{
const char *name = DECL_SECTION_NAME (decl);
return false;
}
-/* Return true if X is a small data address that can be rewritten
- as a gp+symref. */
-
-static bool
-arc_rewrite_small_data_p (const_rtx x)
-{
- if (GET_CODE (x) == CONST)
- x = XEXP (x, 0);
-
- if (GET_CODE (x) == PLUS)
- {
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- x = XEXP (x, 0);
- }
-
- if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
- {
- gcc_assert (SYMBOL_REF_TLS_MODEL (x) == 0);
- return true;
- }
- return false;
-}
-
-/* If possible, rewrite OP so that it refers to small data using
- explicit relocations. */
-
-static rtx
-arc_rewrite_small_data_1 (rtx op)
-{
- rtx rgp = gen_rtx_REG (Pmode, SDATA_BASE_REGNUM);
- op = copy_insn (op);
- subrtx_ptr_iterator::array_type array;
- FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
- {
- rtx *loc = *iter;
- if (arc_rewrite_small_data_p (*loc))
- {
- *loc = gen_rtx_PLUS (Pmode, rgp, *loc);
- iter.skip_subrtxes ();
- }
- else if (GET_CODE (*loc) == PLUS
- && rtx_equal_p (XEXP (*loc, 0), rgp))
- iter.skip_subrtxes ();
- }
- return op;
-}
-
-rtx
-arc_rewrite_small_data (rtx op)
-{
- op = arc_rewrite_small_data_1 (op);
-
- /* Check if we fit small data constraints. */
- if (MEM_P (op)
- && !LEGITIMATE_SMALL_DATA_ADDRESS_P (XEXP (op, 0)))
- {
- rtx addr = XEXP (op, 0);
- rtx tmp = gen_reg_rtx (Pmode);
- emit_move_insn (tmp, addr);
- op = replace_equiv_address_nv (op, tmp);
- }
- return op;
-}
-
-/* Return true if OP refers to small data symbols directly, not through
- a PLUS. */
-
-bool
-small_data_pattern (rtx op, machine_mode)
-{
- if (GET_CODE (op) == SEQUENCE)
- return false;
-
- rtx rgp = gen_rtx_REG (Pmode, SDATA_BASE_REGNUM);
- subrtx_iterator::array_type array;
- FOR_EACH_SUBRTX (iter, array, op, ALL)
- {
- const_rtx x = *iter;
- if (GET_CODE (x) == PLUS
- && rtx_equal_p (XEXP (x, 0), rgp))
- iter.skip_subrtxes ();
- else if (arc_rewrite_small_data_p (x))
- return true;
- }
- return false;
-}
-
/* Return true if OP is an acceptable memory operand for ARCompact
16-bit gp-relative load instructions.
- op shd look like : [r26, symref@sda]
- i.e. (mem (plus (reg 26) (symref with smalldata flag set))
- */
+*/
/* volatile cache option still to be handled. */
bool
{
rtx addr;
int size;
- tree decl = NULL_TREE;
int align = 0;
int mask = 0;
/* Decode the address now. */
addr = XEXP (op, 0);
- if (!LEGITIMATE_SMALL_DATA_ADDRESS_P (addr))
+ if (!legitimate_small_data_address_p (addr))
return false;
if (!short_p || size == 1)
/* Now check for the alignment, the short loads using gp require the
addresses to be aligned. */
- if (GET_CODE (XEXP (addr, 1)) == SYMBOL_REF)
- decl = SYMBOL_REF_DECL (XEXP (addr, 1));
- else if (GET_CODE (XEXP (XEXP (XEXP (addr, 1), 0), 0)) == SYMBOL_REF)
- decl = SYMBOL_REF_DECL (XEXP (XEXP (XEXP (addr, 1), 0), 0));
- if (decl)
- align = DECL_ALIGN (decl);
- align = align / BITS_PER_UNIT;
-
+ align = get_symbol_alignment (addr);
switch (mode)
{
case E_HImode:
return false;
}
+/* Return TRUE if PAT is accessing an aux-reg. */
+
+static bool
+arc_is_aux_reg_p (rtx pat)
+{
+ tree attrs = NULL_TREE;
+ tree addr;
+
+ if (!MEM_P (pat))
+ return false;
+
+ /* Get the memory attributes. */
+ addr = MEM_EXPR (pat);
+ if (!addr)
+ return false;
+
+ /* Get the attributes. */
+ if (TREE_CODE (addr) == VAR_DECL)
+ attrs = DECL_ATTRIBUTES (addr);
+ else if (TREE_CODE (addr) == MEM_REF)
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
+ else
+ return false;
+
+ if (lookup_attribute ("aux", attrs))
+ return true;
+ return false;
+}
+
/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. */
void
unsigned HOST_WIDE_INT align,
unsigned HOST_WIDE_INT globalize_p)
{
- int in_small_data = arc_in_small_data_p (decl);
+ int in_small_data = arc_in_small_data_p (decl);
+ rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
+
+ /* Don't output aux-reg symbols. */
+ if (mem != NULL_RTX && MEM_P (mem)
+ && SYMBOL_REF_P (XEXP (mem, 0))
+ && arc_is_aux_reg_p (mem))
+ return;
if (in_small_data)
switch_to_section (get_named_section (NULL, ".sbss", 0));
arc_register_move_cost (machine_mode,
enum reg_class from_class, enum reg_class to_class)
{
- /* The ARC600 has no bypass for extension registers, hence a nop might be
- needed to be inserted after a write so that reads are safe. */
- if (TARGET_ARC600)
- {
- if (to_class == MPY_WRITABLE_CORE_REGS)
- return 3;
- /* Instructions modifying LP_COUNT need 4 additional cycles before
- the register will actually contain the value. */
- else if (to_class == LPCOUNT_REG)
- return 6;
- else if (to_class == WRITABLE_CORE_REGS)
- return 6;
- }
-
- /* Using lp_count as scratch reg is a VERY bad idea. */
- if (from_class == LPCOUNT_REG)
- return 1000;
- if (to_class == LPCOUNT_REG)
- return 6;
-
/* Force an attempt to 'mov Dy,Dx' to spill. */
if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
&& from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
/* Try to emit a 16 bit opcode with long immediate. */
ret = 6;
if (short_p && match)
- ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
+ ADDSI_OUTPUT1 ("add%? %0,%1,%2");
/* We have to use a 32 bit opcode, and with a long immediate. */
ret = 8;
- ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
+ ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%2");
}
/* Emit code for an commutative_cond_exec instruction with OPERANDS.
return true;
}
+static bool
+arc_get_aux_arg (rtx pat, int *auxr)
+{
+ tree attr, addr = MEM_EXPR (pat);
+ if (TREE_CODE (addr) != VAR_DECL)
+ return false;
+
+ attr = DECL_ATTRIBUTES (addr);
+ if (lookup_attribute ("aux", attr))
+ {
+ tree arg = TREE_VALUE (attr);
+ if (arg)
+ {
+ *auxr = TREE_INT_CST_LOW (TREE_VALUE (arg));
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* Prepare operands for move in MODE. Return true iff the move has
been emitted. */
bool
prepare_move_operands (rtx *operands, machine_mode mode)
{
- /* We used to do this only for MODE_INT Modes, but addresses to floating
- point variables may well be in the small data section. */
- if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
- operands[0] = arc_rewrite_small_data (operands[0]);
+ /* First handle aux attribute. */
+ if (mode == SImode
+ && (MEM_P (operands[0]) || MEM_P (operands[1])))
+ {
+ rtx tmp;
+ int auxr = 0;
+ if (MEM_P (operands[0]) && arc_is_aux_reg_p (operands[0]))
+ {
+ /* Save operation. */
+ if (arc_get_aux_arg (operands[0], &auxr))
+ {
+ tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, GEN_INT (auxr));
+ }
+ else
+ {
+ tmp = XEXP (operands[0], 0);
+ }
+
+ operands[1] = force_reg (SImode, operands[1]);
+ emit_insn (gen_rtx_UNSPEC_VOLATILE
+ (VOIDmode, gen_rtvec (2, operands[1], tmp),
+ VUNSPEC_ARC_SR));
+ return true;
+ }
+ if (MEM_P (operands[1]) && arc_is_aux_reg_p (operands[1]))
+ {
+ if (arc_get_aux_arg (operands[1], &auxr))
+ {
+ tmp = gen_reg_rtx (SImode);
+ emit_move_insn (tmp, GEN_INT (auxr));
+ }
+ else
+ {
+ tmp = XEXP (operands[1], 0);
+ gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+ }
+ /* Load operation. */
+ gcc_assert (REG_P (operands[0]));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_UNSPEC_VOLATILE
+ (SImode, gen_rtvec (1, tmp),
+ VUNSPEC_ARC_LR)));
+ return true;
+ }
+ }
if (mode == SImode && SYMBOLIC_CONST (operands[1]))
{
here and references the variable directly. */
}
- if (GET_CODE (operands[0]) != MEM
- && !TARGET_NO_SDATA_SET
- && small_data_pattern (operands[1], Pmode))
- {
- /* This is to take care of address calculations involving sdata
- variables. */
- operands[1] = arc_rewrite_small_data (operands[1]);
-
- emit_insn (gen_rtx_SET (operands[0],operands[1]));
- /* ??? This note is useless, since it only restates the set itself.
- We should rather use the original SYMBOL_REF. However, there is
- the problem that we are lying to the compiler about these
- SYMBOL_REFs to start with. symbol@sda should be encoded specially
- so that we can tell it apart from an actual symbol. */
- set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
-
- /* Take care of the REG_EQUAL note that will be attached to mark the
- output reg equal to the initial symbol_ref after this code is
- executed. */
- emit_move_insn (operands[0], operands[0]);
- return true;
- }
-
if (MEM_P (operands[0])
&& !(reload_in_progress || reload_completed))
{
return false;
}
-/* Prepare OPERANDS for an extension using CODE to OMODE.
- Return true iff the move has been emitted. */
-
-bool
-prepare_extend_operands (rtx *operands, enum rtx_code code,
- machine_mode omode)
-{
- if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
- {
- /* This is to take care of address calculations involving sdata
- variables. */
- operands[1]
- = gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
- emit_insn (gen_rtx_SET (operands[0], operands[1]));
- set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
-
- /* Take care of the REG_EQUAL note that will be attached to mark the
- output reg equal to the initial extension after this code is
- executed. */
- emit_move_insn (operands[0], operands[0]);
- return true;
- }
- return false;
-}
-
/* Output a library call to a function called FNAME that has been arranged
to be local to any dso. */
return len;
}
-/* Values for length_sensitive. */
-enum
-{
- ARC_LS_NONE,// Jcc
- ARC_LS_25, // 25 bit offset, B
- ARC_LS_21, // 21 bit offset, Bcc
- ARC_LS_U13,// 13 bit unsigned offset, LP
- ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
- ARC_LS_9, // 9 bit offset, BRcc
- ARC_LS_8, // 8 bit offset, BRcc_s
- ARC_LS_U7, // 7 bit unsigned offset, LPcc
- ARC_LS_7 // 7 bit offset, Bcc_s
-};
-
-/* While the infrastructure patch is waiting for review, duplicate the
- struct definitions, to allow this file to compile. */
-#if 1
-typedef struct
-{
- unsigned align_set;
- /* Cost as a branch / call target or call return address. */
- int target_cost;
- int fallthrough_cost;
- int branch_cost;
- int length;
- /* 0 for not length sensitive, 1 for largest offset range,
- * 2 for next smaller etc. */
- unsigned length_sensitive : 8;
- bool enabled;
-} insn_length_variant_t;
-
-typedef struct insn_length_parameters_s
-{
- int align_unit_log;
- int align_base_log;
- int max_variants;
- int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
-} insn_length_parameters_t;
-
-static void
-arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
-#endif
-
-static int
-arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
- insn_length_variant_t *ilv)
-{
- if (!NONDEBUG_INSN_P (insn))
- return 0;
- enum attr_type type;
- /* shorten_branches doesn't take optimize_size into account yet for the
- get_variants mechanism, so turn this off for now. */
- if (optimize_size)
- return 0;
- if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
- {
- /* The interaction of a short delay slot insn with a short branch is
- too weird for shorten_branches to piece together, so describe the
- entire SEQUENCE. */
- rtx_insn *inner;
- if (TARGET_UPSIZE_DBR
- && get_attr_length (pat->insn (1)) <= 2
- && (((type = get_attr_type (inner = pat->insn (0)))
- == TYPE_UNCOND_BRANCH)
- || type == TYPE_BRANCH)
- && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
- {
- int n_variants
- = arc_get_insn_variants (inner, get_attr_length (inner), true,
- target_p, ilv+1);
- /* The short variant gets split into a higher-cost aligned
- and a lower cost unaligned variant. */
- gcc_assert (n_variants);
- gcc_assert (ilv[1].length_sensitive == ARC_LS_7
- || ilv[1].length_sensitive == ARC_LS_10);
- gcc_assert (ilv[1].align_set == 3);
- ilv[0] = ilv[1];
- ilv[0].align_set = 1;
- ilv[0].branch_cost += 1;
- ilv[1].align_set = 2;
- n_variants++;
- for (int i = 0; i < n_variants; i++)
- ilv[i].length += 2;
- /* In case an instruction with aligned size is wanted, and
- the short variants are unavailable / too expensive, add
- versions of long branch + long delay slot. */
- for (int i = 2, end = n_variants; i < end; i++, n_variants++)
- {
- ilv[n_variants] = ilv[i];
- ilv[n_variants].length += 2;
- }
- return n_variants;
- }
- return 0;
- }
- insn_length_variant_t *first_ilv = ilv;
- type = get_attr_type (insn);
- bool delay_filled
- = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
- int branch_align_cost = delay_filled ? 0 : 1;
- int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
- /* If the previous instruction is an sfunc call, this insn is always
- a target, even though the middle-end is unaware of this. */
- bool force_target = false;
- rtx_insn *prev = prev_active_insn (insn);
- if (prev && arc_next_active_insn (prev, 0) == insn
- && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
- ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
- NON_SIBCALL)
- : (CALL_ATTR (prev, NON_SIBCALL)
- && NEXT_INSN (PREV_INSN (prev)) == prev)))
- force_target = true;
-
- switch (type)
- {
- case TYPE_BRCC:
- /* Short BRCC only comes in no-delay-slot version, and without limm */
- if (!delay_filled)
- {
- ilv->align_set = 3;
- ilv->length = 2;
- ilv->branch_cost = 1;
- ilv->enabled = (len == 2);
- ilv->length_sensitive = ARC_LS_8;
- ilv++;
- }
- /* Fall through. */
- case TYPE_BRCC_NO_DELAY_SLOT:
- /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
- (delay slot) scheduling purposes, but they are longer. */
- if (GET_CODE (PATTERN (insn)) == PARALLEL
- && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
- return 0;
- /* Standard BRCC: 4 bytes, or 8 bytes with limm. */
- ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
- ilv->align_set = 3;
- ilv->branch_cost = branch_align_cost;
- ilv->enabled = (len <= ilv->length);
- ilv->length_sensitive = ARC_LS_9;
- if ((target_p || force_target)
- || (!delay_filled && TARGET_UNALIGN_BRANCH))
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->target_cost = 1;
- ilv->branch_cost = branch_unalign_cost;
- }
- ilv++;
-
- rtx op, op0;
- op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
- op0 = XEXP (op, 0);
-
- if (GET_CODE (op0) == ZERO_EXTRACT
- && satisfies_constraint_L (XEXP (op0, 2)))
- op0 = XEXP (op0, 0);
- if (satisfies_constraint_Rcq (op0))
- {
- ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
- ilv->align_set = 3;
- ilv->branch_cost = 1 + branch_align_cost;
- ilv->fallthrough_cost = 1;
- ilv->enabled = true;
- ilv->length_sensitive = ARC_LS_21;
- if (!delay_filled && TARGET_UNALIGN_BRANCH)
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->branch_cost = 1 + branch_unalign_cost;
- }
- ilv++;
- }
- ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
- ilv->align_set = 3;
- ilv->branch_cost = 1 + branch_align_cost;
- ilv->fallthrough_cost = 1;
- ilv->enabled = true;
- ilv->length_sensitive = ARC_LS_21;
- if ((target_p || force_target)
- || (!delay_filled && TARGET_UNALIGN_BRANCH))
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->target_cost = 1;
- ilv->branch_cost = 1 + branch_unalign_cost;
- }
- ilv++;
- break;
-
- case TYPE_SFUNC:
- ilv->length = 12;
- goto do_call;
- case TYPE_CALL_NO_DELAY_SLOT:
- ilv->length = 8;
- goto do_call;
- case TYPE_CALL:
- ilv->length = 4;
- ilv->length_sensitive
- = GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
- do_call:
- ilv->align_set = 3;
- ilv->fallthrough_cost = branch_align_cost;
- ilv->enabled = true;
- if ((target_p || force_target)
- || (!delay_filled && TARGET_UNALIGN_BRANCH))
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->target_cost = 1;
- ilv->fallthrough_cost = branch_unalign_cost;
- }
- ilv++;
- break;
- case TYPE_UNCOND_BRANCH:
- /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
- but that makes no difference at the moment. */
- ilv->length_sensitive = ARC_LS_7;
- ilv[1].length_sensitive = ARC_LS_25;
- goto do_branch;
- case TYPE_BRANCH:
- ilv->length_sensitive = ARC_LS_10;
- ilv[1].length_sensitive = ARC_LS_21;
- do_branch:
- ilv->align_set = 3;
- ilv->length = 2;
- ilv->branch_cost = branch_align_cost;
- ilv->enabled = (len == ilv->length);
- ilv++;
- ilv->length = 4;
- ilv->align_set = 3;
- ilv->branch_cost = branch_align_cost;
- ilv->enabled = true;
- if ((target_p || force_target)
- || (!delay_filled && TARGET_UNALIGN_BRANCH))
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->target_cost = 1;
- ilv->branch_cost = branch_unalign_cost;
- }
- ilv++;
- break;
- case TYPE_JUMP:
- return 0;
- default:
- /* For every short insn, there is generally also a long insn.
- trap_s is an exception. */
- if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
- return 0;
- ilv->align_set = 3;
- ilv->length = len;
- ilv->enabled = 1;
- ilv++;
- ilv->align_set = 3;
- ilv->length = len + 2;
- ilv->enabled = 1;
- if (target_p || force_target)
- {
- ilv[1] = *ilv;
- ilv->align_set = 1;
- ilv++;
- ilv->align_set = 2;
- ilv->target_cost = 1;
- }
- ilv++;
- }
- /* If the previous instruction is an sfunc call, this insn is always
- a target, even though the middle-end is unaware of this.
- Therefore, if we have a call predecessor, transfer the target cost
- to the fallthrough and branch costs. */
- if (force_target)
- {
- for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
- {
- p->fallthrough_cost += p->target_cost;
- p->branch_cost += p->target_cost;
- p->target_cost = 0;
- }
- }
-
- return ilv - first_ilv;
-}
-
-static void
-arc_insn_length_parameters (insn_length_parameters_t *ilp)
-{
- ilp->align_unit_log = 1;
- ilp->align_base_log = 1;
- ilp->max_variants = 7;
- ilp->get_variants = arc_get_insn_variants;
-}
-
/* Return a copy of COND from *STATEP, inverted if that is indicated by the
CC field of *STATEP. */
return orig_x;
}
-static rtx
-arc_delegitimize_address_0 (rtx x)
-{
- rtx u, gp, p;
+static rtx
+arc_delegitimize_address_0 (rtx op)
+{
+ switch (GET_CODE (op))
+ {
+ case CONST:
+ return arc_delegitimize_address_0 (XEXP (op, 0));
+
+ case UNSPEC:
+ switch (XINT (op, 1))
+ {
+ case ARC_UNSPEC_GOT:
+ case ARC_UNSPEC_GOTOFFPC:
+ return XVECEXP (op, 0, 0);
+ default:
+ break;
+ }
+ break;
+
+ case PLUS:
+ {
+ rtx t1 = arc_delegitimize_address_0 (XEXP (op, 0));
+ rtx t2 = XEXP (op, 1);
- if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
- {
- if (XINT (u, 1) == ARC_UNSPEC_GOT
- || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC)
- return XVECEXP (u, 0, 0);
+ if (t1 && t2)
+ return gen_rtx_PLUS (GET_MODE (op), t1, t2);
+ break;
+ }
+
+ default:
+ break;
}
- else if (GET_CODE (x) == CONST && GET_CODE (p = XEXP (x, 0)) == PLUS
- && GET_CODE (u = XEXP (p, 0)) == UNSPEC
- && (XINT (u, 1) == ARC_UNSPEC_GOT
- || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC))
- return gen_rtx_CONST
- (GET_MODE (x),
- gen_rtx_PLUS (GET_MODE (p), XVECEXP (u, 0, 0), XEXP (p, 1)));
- else if (GET_CODE (x) == PLUS
- && ((REG_P (gp = XEXP (x, 0))
- && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
- || (GET_CODE (gp) == CONST
- && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
- && XINT (u, 1) == ARC_UNSPEC_GOT
- && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
- && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
- && GET_CODE (XEXP (x, 1)) == CONST
- && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
- && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
- return XVECEXP (u, 0, 0);
- else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
- && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
- && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
- || (GET_CODE (gp) == CONST
- && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
- && XINT (u, 1) == ARC_UNSPEC_GOT
- && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
- && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
- && GET_CODE (XEXP (x, 1)) == CONST
- && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
- && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
- return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
- XVECEXP (u, 0, 0));
- else if (GET_CODE (x) == PLUS
- && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
- return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
return NULL_RTX;
}
static rtx
-arc_delegitimize_address (rtx x)
+arc_delegitimize_address (rtx orig_x)
{
- rtx orig_x = x = delegitimize_mem_from_attrs (x);
- if (GET_CODE (x) == MEM)
+ rtx x = orig_x;
+
+ if (MEM_P (x))
x = XEXP (x, 0);
+
x = arc_delegitimize_address_0 (x);
- if (x)
- {
- if (MEM_P (orig_x))
- x = replace_equiv_address_nv (orig_x, x);
- return x;
- }
- return orig_x;
+ if (!x)
+ return orig_x;
+
+ if (MEM_P (orig_x))
+ x = replace_equiv_address_nv (orig_x, x);
+ return x;
}
/* Return a REG rtx for acc1. N.B. the gcc-internal representation may
return !optimize_size && arc_reorg_in_progress;
}
-/* We are about to output a return insn. Add padding if necessary to avoid
- a mispredict. A return could happen immediately after the function
- start, but after a call we know that there will be at least a blink
- restore. */
-
-void
-arc_pad_return (void)
-{
- rtx_insn *insn = current_output_insn;
- rtx_insn *prev = prev_active_insn (insn);
- int want_long;
-
- if (!prev)
- {
- fputs ("\tnop_s\n", asm_out_file);
- cfun->machine->unalign ^= 2;
- want_long = 1;
- }
- /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
- because after a call, we'd have to restore blink first. */
- else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
- return;
- else
- {
- want_long = (get_attr_length (prev) == 2);
- prev = prev_active_insn (prev);
- }
- if (!prev
- || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
- ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
- NON_SIBCALL)
- : CALL_ATTR (prev, NON_SIBCALL)))
- {
- if (want_long)
- cfun->machine->size_reason
- = "call/return and return/return must be 6 bytes apart to avoid mispredict";
- else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
- {
- cfun->machine->size_reason
- = "Long unaligned jump avoids non-delay slot penalty";
- want_long = 1;
- }
- /* Disgorge delay insn, if there is any, and it may be moved. */
- if (final_sequence
- /* ??? Annulled would be OK if we can and do conditionalize
- the delay slot insn accordingly. */
- && !INSN_ANNULLED_BRANCH_P (insn)
- && (get_attr_cond (insn) != COND_USE
- || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
- XVECEXP (final_sequence, 0, 1))))
- {
- prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
- gcc_assert (!prev_real_insn (insn)
- || !arc_hazard (prev_real_insn (insn), prev));
- cfun->machine->force_short_suffix = !want_long;
- rtx save_pred = current_insn_predicate;
- final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
- cfun->machine->force_short_suffix = -1;
- prev->set_deleted ();
- current_output_insn = insn;
- current_insn_predicate = save_pred;
- }
- else if (want_long)
- fputs ("\tnop\n", asm_out_file);
- else
- {
- fputs ("\tnop_s\n", asm_out_file);
- cfun->machine->unalign ^= 2;
- }
- }
- return;
-}
-
/* The usual; we set up our machine_function data. */
static struct machine_function *
struct machine_function *machine;
machine = ggc_cleared_alloc<machine_function> ();
machine->fn_type = ARC_FUNCTION_UNKNOWN;
- machine->force_short_suffix = -1;
return machine;
}
static bool
arc_process_double_reg_moves (rtx *operands)
{
- rtx dest = operands[0];
- rtx src = operands[1];
-
enum usesDxState { none, srcDx, destDx, maxDx };
enum usesDxState state = none;
+ rtx dest = operands[0];
+ rtx src = operands[1];
if (refers_to_regno_p (40, 44, src, 0))
- state = srcDx;
+ {
+ state = srcDx;
+ gcc_assert (REG_P (dest));
+ }
if (refers_to_regno_p (40, 44, dest, 0))
{
/* Via arc_register_move_cost, we should never see D,D moves. */
+ gcc_assert (REG_P (src));
gcc_assert (state == none);
state = destDx;
}
if (TARGET_LL64
&& ((memory_operand (operands[0], mode)
- && even_register_operand (operands[1], mode))
+ && (even_register_operand (operands[1], mode)
+ || satisfies_constraint_Cm3 (operands[1])))
|| (memory_operand (operands[1], mode)
&& even_register_operand (operands[0], mode))))
{
/* Return the integer value of the "type" attribute for INSN, or -1 if
INSN can't have attributes. */
-int
+static int
arc_attr_type (rtx_insn *insn)
{
if (NONJUMP_INSN_P (insn)
return cfun->machine->arc_reorg_started;
}
+/* Code has a minimum p2 alignment of 1, which we must restore after
+ an ADDR_DIFF_VEC. */
+
int
arc_label_align (rtx_insn *label)
{
- /* Code has a minimum p2 alignment of 1, which we must restore after an
- ADDR_DIFF_VEC. */
- if (align_labels_log < 1)
+ if (align_labels.levels[0].log < 1)
{
rtx_insn *next = next_nonnote_nondebug_insn (label);
if (INSN_P (next) && recog_memoized (next) >= 0)
return 1;
}
- return align_labels_log;
+ return align_labels.levels[0].log;
}
/* Return true if LABEL is in executable code. */
case TYPE_BRANCH:
if (get_attr_length (u.r) != 2)
break;
+ /* Fall through. */
case TYPE_BRCC:
case TYPE_BRCC_NO_DELAY_SLOT:
return false;
if (ARC_INTERRUPT_P (fn_type))
{
- if (((fn_type & ARC_FUNCTION_ILINK1) | ARC_FUNCTION_FIRQ) != 0)
- regno = ILINK1_REGNUM;
+ if ((fn_type & (ARC_FUNCTION_ILINK1 | ARC_FUNCTION_FIRQ)) != 0)
+ regno = ILINK1_REG;
else if ((fn_type & ARC_FUNCTION_ILINK2) != 0)
- regno = ILINK2_REGNUM;
+ regno = ILINK2_REG;
else
- gcc_unreachable ();
+ gcc_unreachable ();
}
else if (ARC_NORMAL_P (fn_type) || ARC_NAKED_P (fn_type))
regno = RETURN_ADDR_REGNUM;
return false;
}
-#ifndef TARGET_NO_LRA
-#define TARGET_NO_LRA !TARGET_LRA
-#endif
+/* Return true if we use LRA instead of reload pass. */
-static bool
+bool
arc_lra_p (void)
{
- return !TARGET_NO_LRA;
+ return arc_lra_flag;
}
/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use
if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
return false;
+ /* likewise for uncached types. */
+ if (arc_is_uncached_mem_p (op))
+ return false;
+
if (mode == VOIDmode)
mode = GET_MODE (op);
return false;
}
+/* Return the frame pointer value to be backed up in the setjmp buffer. */
+
+static rtx
+arc_builtin_setjmp_frame_value (void)
+{
+ /* We always want to preserve whatever value is currently in the frame
+ pointer register. For frames that are using the frame pointer the new
+ value of the frame pointer register will have already been computed
+ (as part of the prologue). For frames that are not using the frame
+ pointer it is important that we backup whatever value is in the frame
+ pointer register, as earlier (more outer) frames may have placed a
+ value into the frame pointer register. It might be tempting to try
+ and use `frame_pointer_rtx` here, however, this is not what we want.
+ For frames that are using the frame pointer this will give the
+ correct value. However, for frames that are not using the frame
+ pointer this will still give the value that _would_ have been the
+ frame pointer value for this frame (if the use of the frame pointer
+ had not been removed). We really do want the raw frame pointer
+ register value. */
+ return gen_raw_REG (Pmode, FRAME_POINTER_REGNUM);
+}
+
+/* Return nonzero if a jli call should be generated for a call from
+ the current function to DECL. */
+
+bool
+arc_is_jli_call_p (rtx pat)
+{
+ tree attrs;
+ tree decl = SYMBOL_REF_DECL (pat);
+
+ /* If it is not a well defined public function then return false. */
+ if (!decl || !SYMBOL_REF_FUNCTION_P (pat) || !TREE_PUBLIC (decl))
+ return false;
+
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("jli_always", attrs))
+ return true;
+
+ if (lookup_attribute ("jli_fixed", attrs))
+ return true;
+
+ return TARGET_JLI_ALWAYS;
+}
+
+/* Handle and "jli" attribute; arguments as in struct
+ attribute_spec.handler. */
+
+static tree
+arc_handle_jli_attribute (tree *node ATTRIBUTE_UNUSED,
+ tree name, tree args, int,
+ bool *no_add_attrs)
+{
+ if (!TARGET_V2)
+ {
+ warning (OPT_Wattributes,
+ "%qE attribute only valid for ARCv2 architecture",
+ name);
+ *no_add_attrs = true;
+ }
+
+ if (args == NULL_TREE)
+ {
+ warning (OPT_Wattributes,
+ "argument of %qE attribute is missing",
+ name);
+ *no_add_attrs = true;
+ }
+ else
+ {
+ if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
+ TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
+ tree arg = TREE_VALUE (args);
+ if (TREE_CODE (arg) != INTEGER_CST)
+ {
+ warning (0, "%qE attribute allows only an integer constant argument",
+ name);
+ *no_add_attrs = true;
+ }
+ /* FIXME! add range check. TREE_INT_CST_LOW (arg) */
+ }
+ return NULL_TREE;
+}
+
+/* Handle and "scure" attribute; arguments as in struct
+ attribute_spec.handler. */
+
+static tree
+arc_handle_secure_attribute (tree *node ATTRIBUTE_UNUSED,
+ tree name, tree args, int,
+ bool *no_add_attrs)
+{
+ if (!TARGET_EM)
+ {
+ warning (OPT_Wattributes,
+ "%qE attribute only valid for ARC EM architecture",
+ name);
+ *no_add_attrs = true;
+ }
+
+ if (args == NULL_TREE)
+ {
+ warning (OPT_Wattributes,
+ "argument of %qE attribute is missing",
+ name);
+ *no_add_attrs = true;
+ }
+ else
+ {
+ if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
+ TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
+ tree arg = TREE_VALUE (args);
+ if (TREE_CODE (arg) != INTEGER_CST)
+ {
+ warning (0, "%qE attribute allows only an integer constant argument",
+ name);
+ *no_add_attrs = true;
+ }
+ }
+ return NULL_TREE;
+}
+
+/* Return nonzero if the symbol is a secure function. */
+
+bool
+arc_is_secure_call_p (rtx pat)
+{
+ tree attrs;
+ tree decl = SYMBOL_REF_DECL (pat);
+
+ if (!decl)
+ return false;
+
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("secure_call", attrs))
+ return true;
+
+ return false;
+}
+
+/* Handle "uncached" qualifier. */
+
+static tree
+arc_handle_uncached_attribute (tree *node,
+ tree name, tree args,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ if (DECL_P (*node) && TREE_CODE (*node) != TYPE_DECL)
+ {
+ error ("%qE attribute only applies to types",
+ name);
+ *no_add_attrs = true;
+ }
+ else if (args)
+ {
+ warning (OPT_Wattributes, "argument of %qE attribute ignored", name);
+ }
+ return NULL_TREE;
+}
+
+/* Return TRUE if PAT is a memory addressing an uncached data. */
+
+bool
+arc_is_uncached_mem_p (rtx pat)
+{
+ tree attrs = NULL_TREE;
+ tree addr;
+
+ if (!MEM_P (pat))
+ return false;
+
+ /* Get the memory attributes. */
+ addr = MEM_EXPR (pat);
+ if (!addr)
+ return false;
+
+ /* Get the attributes. */
+ if (TREE_CODE (addr) == MEM_REF)
+ {
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (addr));
+ if (lookup_attribute ("uncached", attrs))
+ return true;
+
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 0)));
+ if (lookup_attribute ("uncached", attrs))
+ return true;
+ }
+
+ /* For COMPONENT_REF, use the FIELD_DECL from tree operand 1. */
+ if (TREE_CODE (addr) == COMPONENT_REF)
+ {
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (TREE_OPERAND (addr, 1)));
+ if (lookup_attribute ("uncached", attrs))
+ return true;
+ }
+ return false;
+}
+
+/* Handle aux attribute. The auxiliary registers are addressed using
+ special instructions lr and sr. The attribute 'aux' indicates if a
+ variable refers to the aux-regs and what is the register number
+ desired. */
+
+static tree
+arc_handle_aux_attribute (tree *node,
+ tree name, tree args, int,
+ bool *no_add_attrs)
+{
+ /* Isn't it better to use address spaces for the aux-regs? */
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) != VAR_DECL)
+ {
+ error ("%qE attribute only applies to variables", name);
+ *no_add_attrs = true;
+ }
+ else if (args)
+ {
+ if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
+ TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
+ tree arg = TREE_VALUE (args);
+ if (TREE_CODE (arg) != INTEGER_CST)
+ {
+ warning (OPT_Wattributes, "%qE attribute allows only an integer "
+ "constant argument", name);
+ *no_add_attrs = true;
+ }
+ /* FIXME! add range check. TREE_INT_CST_LOW (arg) */
+ }
+
+ if (TREE_CODE (*node) == VAR_DECL)
+ {
+ tree fntype = TREE_TYPE (*node);
+ if (fntype && TREE_CODE (fntype) == POINTER_TYPE)
+ {
+ tree attrs = tree_cons (get_identifier ("aux"), NULL_TREE,
+ TYPE_ATTRIBUTES (fntype));
+ TYPE_ATTRIBUTES (fntype) = attrs;
+ }
+ }
+ }
+ return NULL_TREE;
+}
+
/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P. We don't want to use
anchors for small data: the GP register acts as an anchor in that
case. We also don't want to use them for PC-relative accesses,
return default_use_anchors_for_symbol_p (symbol);
}
+/* Return true if SUBST can't safely replace its equivalent during RA. */
+static bool
+arc_cannot_substitute_mem_equiv_p (rtx)
+{
+ /* If SUBST is mem[base+index], the address may not fit ISA,
+ thus return true. */
+ return true;
+}
+
+/* Checks whether the operands are valid for use in an LDD/STD
+ instruction. Assumes that RT, and RT2 are REG. This is guaranteed
+ by the patterns. Assumes that the address in the base register RN
+ is word aligned. Pattern guarantees that both memory accesses use
+ the same base register, the offsets are constants within the range,
+ and the gap between the offsets is 4. If reload complete then
+ check that registers are legal. */
+
+static bool
+operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
+{
+ unsigned int t, t2;
+
+ if (!reload_completed)
+ return true;
+
+ if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
+ (offset & (GET_MODE_SIZE (DImode) - 1) & 3
+ ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
+ return false;
+
+ t = REGNO (rt);
+ t2 = REGNO (rt2);
+
+ if ((t2 == PCL_REG)
+ || (t % 2 != 0) /* First destination register is not even. */
+ || (t2 != t + 1))
+ return false;
+
+ return true;
+}
+
+/* Helper for gen_operands_ldd_std. Returns true iff the memory
+ operand MEM's address contains an immediate offset from the base
+ register and has no side effects, in which case it sets BASE and
+ OFFSET accordingly. */
+
+static bool
+mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (base != NULL && offset != NULL);
+
+ /* TODO: Handle more general memory operand patterns, such as
+ PRE_DEC and PRE_INC. */
+
+ if (side_effects_p (mem))
+ return false;
+
+ /* Can't deal with subregs. */
+ if (GET_CODE (mem) == SUBREG)
+ return false;
+
+ gcc_assert (MEM_P (mem));
+
+ *offset = const0_rtx;
+
+ addr = XEXP (mem, 0);
+
+ /* If addr isn't valid for DImode, then we can't handle it. */
+ if (!arc_legitimate_address_p (DImode, addr,
+ reload_in_progress || reload_completed))
+ return false;
+
+ if (REG_P (addr))
+ {
+ *base = addr;
+ return true;
+ }
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return (REG_P (*base) && CONST_INT_P (*offset));
+ }
+
+ return false;
+}
+
+/* Called from peephole2 to replace two word-size accesses with a
+ single LDD/STD instruction. Returns true iff we can generate a new
+ instruction sequence. That is, both accesses use the same base
+ register and the gap between constant offsets is 4. OPERANDS are
+ the operands found by the peephole matcher; OPERANDS[0,1] are
+ register operands, and OPERANDS[2,3] are the corresponding memory
+ operands. LOAD indicates whether the access is load or store. */
+
+bool
+gen_operands_ldd_std (rtx *operands, bool load, bool commute)
+{
+ int i, gap;
+ HOST_WIDE_INT offsets[2], offset;
+ int nops = 2;
+ rtx cur_base, cur_offset, tmp;
+ rtx base = NULL_RTX;
+
+ /* Check that the memory references are immediate offsets from the
+ same base register. Extract the base register, the destination
+ registers, and the corresponding memory offsets. */
+ for (i = 0; i < nops; i++)
+ {
+ if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
+ return false;
+
+ if (i == 0)
+ base = cur_base;
+ else if (REGNO (base) != REGNO (cur_base))
+ return false;
+
+ offsets[i] = INTVAL (cur_offset);
+ if (GET_CODE (operands[i]) == SUBREG)
+ {
+ tmp = SUBREG_REG (operands[i]);
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+ operands[i] = tmp;
+ }
+ }
+
+ /* Make sure there is no dependency between the individual loads. */
+ if (load && REGNO (operands[0]) == REGNO (base))
+ return false; /* RAW. */
+
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
+ return false; /* WAW. */
+
+ /* Make sure the instructions are ordered with lower memory access first. */
+ if (offsets[0] > offsets[1])
+ {
+ gap = offsets[0] - offsets[1];
+ offset = offsets[1];
+
+ /* Swap the instructions such that lower memory is accessed first. */
+ std::swap (operands[0], operands[1]);
+ std::swap (operands[2], operands[3]);
+ }
+ else
+ {
+ gap = offsets[1] - offsets[0];
+ offset = offsets[0];
+ }
+
+ /* Make sure accesses are to consecutive memory locations. */
+ if (gap != 4)
+ return false;
+
+ /* Make sure we generate legal instructions. */
+ if (operands_ok_ldd_std (operands[0], operands[1], offset))
+ return true;
+
+ if (load && commute)
+ {
+ /* Try reordering registers. */
+ std::swap (operands[0], operands[1]);
+ if (operands_ok_ldd_std (operands[0], operands[1], offset))
+ return true;
+ }
+
+ return false;
+}
+
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
+
+#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
+#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P arc_cannot_substitute_mem_equiv_p
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc_asm_trampoline_template
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-arc.h"