/* Output routines for GCC for ARM.
- Copyright (C) 1991-2022 Free Software Foundation, Inc.
+ Copyright (C) 1991-2024 Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
#include "optabs-libfuncs.h"
#include "gimplify.h"
#include "gimple.h"
+#include "gimple-iterator.h"
#include "selftest.h"
#include "tree-vectorizer.h"
#include "opts.h"
+#include "aarch-common.h"
+#include "aarch-common-protos.h"
/* This file should be included last. */
#include "target-def.h"
static int arm_cortex_m_branch_cost (bool, bool);
static int arm_cortex_m7_branch_cost (bool, bool);
-static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
- const vec_perm_indices &);
+static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
+ rtx, const vec_perm_indices &);
static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &,
vec<const char *> &, vec<rtx> &,
- HARD_REG_SET &, location_t);
+ vec<rtx> &, HARD_REG_SET &, location_t);
static const char *arm_identify_fpu_from_isa (sbitmap);
\f
/* Table of machine attributes. */
-static const struct attribute_spec arm_attribute_table[] =
+static const attribute_spec arm_gnu_attributes[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
affects_type_identity, handler, exclude } */
/* ARMv8-M Security Extensions support. */
{ "cmse_nonsecure_entry", 0, 0, true, false, false, false,
arm_handle_cmse_nonsecure_entry, NULL },
- { "cmse_nonsecure_call", 0, 0, true, false, false, true,
+ { "cmse_nonsecure_call", 0, 0, false, false, false, true,
arm_handle_cmse_nonsecure_call, NULL },
- { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
- { NULL, 0, 0, false, false, false, false, NULL, NULL }
+ { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
+};
+
+static const scoped_attribute_specs arm_gnu_attribute_table =
+{
+ "gnu", { arm_gnu_attributes }
+};
+
+static const scoped_attribute_specs *const arm_attribute_table[] =
+{
+ &arm_gnu_attribute_table
};
\f
/* Initialize the GCC target structure. */
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#undef TARGET_VECTORIZE_BUILTINS
#define TARGET_VECTORIZE_BUILTINS
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
- arm_builtin_vectorized_function
-
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
+#undef TARGET_STACK_PROTECT_GUARD
+#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
+
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
\f
/* Obstack for minipool constant handling. */
static struct obstack minipool_obstack;
/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
int arm_arch8_4 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 8-M Mainline
+ extensions. */
+int arm_arch8m_main = 0;
+
/* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
extensions. */
int arm_arch8_1m_main = 0;
speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
}
+/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
+static bool
+arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+ gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
+ tree fndecl = gimple_call_fndecl (stmt);
+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
+ unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
+ gimple *new_stmt = NULL;
+ switch (code & ARM_BUILTIN_CLASS)
+ {
+ case ARM_BUILTIN_GENERAL:
+ break;
+ case ARM_BUILTIN_MVE:
+ new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
+ }
+ if (!new_stmt)
+ return false;
+
+ gsi_replace (gsi, new_stmt, true);
+ return true;
+}
+
/* On AAPCS systems, this is the "struct __va_list". */
static GTY(()) tree va_list_type;
if (TARGET_THUMB2_P (opts->x_target_flags))
opts->x_inline_asm_unified = true;
+ if (arm_stack_protector_guard == SSP_GLOBAL
+ && opts->x_arm_stack_protector_guard_offset_str)
+ {
+ error ("incompatible options %<-mstack-protector-guard=global%> and "
+ "%<-mstack-protector-guard-offset=%s%>",
+ arm_stack_protector_guard_offset_str);
+ }
+
+ if (opts->x_arm_stack_protector_guard_offset_str)
+ {
+ char *end;
+ const char *str = arm_stack_protector_guard_offset_str;
+ errno = 0;
+ long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
+ if (!*str || *end || errno)
+ error ("%qs is not a valid offset in %qs", str,
+ "-mstack-protector-guard-offset=");
+ arm_stack_protector_guard_offset = offs;
+ }
+
+ if (arm_current_function_pac_enabled_p ())
+ {
+ if (!arm_arch8m_main)
+ error ("This architecture does not support branch protection "
+ "instructions");
+ if (TARGET_TPCS_FRAME)
+ sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
+ }
+
#ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
#endif
tune_opts = strchr (opts->x_arm_tune_string, '+');
}
+ if (opts->x_arm_branch_protection_string)
+ {
+ aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string,
+ "-mbranch-protection=");
+
+ if (aarch_ra_sign_key != AARCH_KEY_A)
+ {
+ warning (0, "invalid key type for %<-mbranch-protection=%>");
+ aarch_ra_sign_key = AARCH_KEY_A;
+ }
+ }
+
if (arm_selected_arch)
{
arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
+ arm_arch8m_main = arm_arch7 && arm_arch_cmse;
arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
if (target_thread_pointer == TP_AUTO)
{
if (arm_arch6k && !TARGET_THUMB1)
- target_thread_pointer = TP_CP15;
+ target_thread_pointer = TP_TPIDRURO;
else
target_thread_pointer = TP_SOFT;
}
+
+ if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
+ error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
}
/* Perform some validation between the desired architecture and the rest of the
if (!reload_completed)
return 0;
+ /* Never use a return instruction when return address signing
+ mechanism is enabled as it requires more than one
+ instruction. */
+ if (arm_current_function_pac_enabled_p ())
+ return 0;
+
func_type = arm_current_func_type ();
/* Naked, volatile and stack alignment functions need special
specification, DECL is the specific declartion. DECL may be null if
the call could be indirect or if this is a library call. */
static enum arm_pcs
-arm_get_pcs_model (const_tree type, const_tree decl)
+arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
{
bool user_convention = false;
enum arm_pcs user_pcs = arm_pcs_default;
return ARM_PCS_AAPCS;
else if (user_convention)
return user_pcs;
+#if 0
+ /* Unfortunately, this is not safe and can lead to wrong code
+ being generated (PR96882). Not all calls into the back-end
+ pass the DECL, so it is unsafe to make any PCS-changing
+ decisions based on it. In particular the RETURN_IN_MEMORY
+ hook is only ever passed a TYPE. This needs revisiting to
+ see if there are any partial improvements that can be
+ re-enabled. */
else if (decl && flag_unit_at_a_time)
{
/* Local functions never leak outside this compilation unit,
if (local_info_node && local_info_node->local)
return ARM_PCS_AAPCS_LOCAL;
}
+#endif
}
else if (user_convention && user_pcs != arm_pcs_default)
sorry ("PCS variant");
a HFA or HVA. */
const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
+const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
/* Walk down the type tree of TYPE counting consecutive base elements.
If *MODEP is VOIDmode, then set it to the first valid floating point
continue;
}
}
+ /* A zero-width bitfield may affect layout in some
+ circumstances, but adds no members. The determination
+ of whether or not a type is an HFA is performed after
+ layout is complete, so if the type still looks like an
+ HFA afterwards, it is still classed as one. This is
+ potentially an ABI break for the hard-float ABI. */
+ else if (DECL_BIT_FIELD (field)
+ && integer_zerop (DECL_SIZE (field)))
+ {
+ /* Prior to GCC-12 these fields were striped early,
+ hiding them from the back-end entirely and
+ resulting in the correct behaviour for argument
+ passing. Simulate that old behaviour without
+ generating a warning. */
+ if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
+ continue;
+ if (warn_psabi_flags)
+ {
+ *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
+ continue;
+ }
+ }
sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
warn_psabi_flags);
&& ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
!= ag_count))
{
- const char *url
+ const char *url10
= CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
+ const char *url12
+ = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
gcc_assert (alt == -1);
last_reported_type_uid = uid;
/* Use TYPE_MAIN_VARIANT to strip any redundant const
inform (input_location, "parameter passing for argument of "
"type %qT with %<[[no_unique_address]]%> members "
"changed %{in GCC 10.1%}",
- TYPE_MAIN_VARIANT (type), url);
+ TYPE_MAIN_VARIANT (type), url10);
else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
inform (input_location, "parameter passing for argument of "
"type %qT when C++17 is enabled changed to match "
"C++14 %{in GCC 10.1%}",
- TYPE_MAIN_VARIANT (type), url);
+ TYPE_MAIN_VARIANT (type), url10);
+ else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
+ inform (input_location, "parameter passing for argument of "
+ "type %qT changed %{in GCC 12.1%}",
+ TYPE_MAIN_VARIANT (type), url12);
}
*count = ag_count;
}
}
else
{
- if (TREE_CODE (*node) == FUNCTION_TYPE
- || TREE_CODE (*node) == METHOD_TYPE)
+ if (FUNC_OR_METHOD_TYPE_P (*node))
{
if (arm_isr_value (args) == ARM_FT_UNKNOWN)
{
}
}
else if (TREE_CODE (*node) == POINTER_TYPE
- && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
- || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
+ && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
&& arm_isr_value (args) != ARM_FT_UNKNOWN)
{
*node = build_variant_type_copy (*node);
int /* flags */,
bool *no_add_attrs)
{
- tree decl = NULL_TREE, fntype = NULL_TREE;
- tree type;
+ tree decl = NULL_TREE;
+ tree fntype, type;
if (!use_cmse)
{
return NULL_TREE;
}
- if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
+ if (DECL_P (*node))
{
- decl = *node;
- fntype = TREE_TYPE (decl);
+ fntype = TREE_TYPE (*node);
+
+ if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
+ decl = *node;
}
+ else
+ fntype = *node;
- while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
+ while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
fntype = TREE_TYPE (fntype);
- if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
+ if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
{
warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
"function pointer", name);
/* Prevent trees being shared among function types with and without
cmse_nonsecure_call attribute. */
- type = TREE_TYPE (decl);
+ if (decl)
+ {
+ type = build_distinct_type_copy (TREE_TYPE (decl));
+ TREE_TYPE (decl) = type;
+ }
+ else
+ {
+ type = build_distinct_type_copy (*node);
+ *node = type;
+ }
- type = build_distinct_type_copy (type);
- TREE_TYPE (decl) = type;
fntype = type;
while (TREE_CODE (fntype) != FUNCTION_TYPE)
/* Add __attribute__ ((long_call)) to all functions, when
inside #pragma long_calls or __attribute__ ((short_call)),
when inside #pragma no_long_calls. */
- if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+ if (FUNC_OR_METHOD_TYPE_P (type))
{
tree type_attr_list, attr_name;
type_attr_list = TYPE_ATTRIBUTES (type);
}
+/* Generate insns that produce the address of the stack canary */
+rtx
+arm_stack_protect_tls_canary_mem (bool reload)
+{
+ rtx tp = gen_reg_rtx (SImode);
+ if (reload)
+ emit_insn (gen_reload_tp_hard (tp));
+ else
+ emit_insn (gen_load_tp_hard (tp));
+
+ rtx reg = gen_reg_rtx (SImode);
+ rtx offset = GEN_INT (arm_stack_protector_guard_offset);
+ emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
+ return gen_rtx_MEM (SImode, reg);
+}
+
+
/* Whether a register is callee saved or not. This is necessary because high
registers are marked as caller saved when optimizing for size on Thumb-1
targets despite being callee saved in order to avoid using them. */
|| !DECL_COMMON (SYMBOL_REF_DECL (orig))))
{
tree decl = SYMBOL_REF_DECL (orig);
- tree init = (TREE_CODE (decl) == VAR_DECL)
+ tree init = VAR_P (decl)
? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
? decl : 0;
int reloc = 0;
if (init && init != error_mark_node)
reloc = compute_reloc_for_constant (init);
- named_section = TREE_CODE (decl) == VAR_DECL
+ named_section = VAR_P (decl)
&& lookup_attribute ("section", DECL_ATTRIBUTES (decl));
readonly = decl_readonly_section (decl, reloc);
bool use_ldrd;
enum rtx_code code = GET_CODE (x);
+ /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
+ can store and load it like any other 16-bit value. */
+ if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
+ mode = HImode;
+
if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
return mve_vector_mem_operand (mode, x, strict_p);
else if (REG_P (XEXP (x, 0))
&& (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
|| REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
- || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
- && REGNO (XEXP (x, 0))
- <= LAST_VIRTUAL_POINTER_REGISTER))
+ || VIRTUAL_REGISTER_P (XEXP (x, 0)))
&& GET_MODE_SIZE (mode) >= 4
&& CONST_INT_P (XEXP (x, 1))
&& (INTVAL (XEXP (x, 1)) & 3) == 0)
}
bool
-arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
+arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
{
if (TARGET_ARM)
return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
return true;
}
+/* Helper for arm_bfi_p. */
+static bool
+arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
+{
+ unsigned HOST_WIDE_INT const1;
+ unsigned HOST_WIDE_INT const2 = 0;
+
+ if (!CONST_INT_P (XEXP (op0, 1)))
+ return false;
+
+ const1 = UINTVAL (XEXP (op0, 1));
+ if (!CONST_INT_P (XEXP (op1, 1))
+ || ~UINTVAL (XEXP (op1, 1)) != const1)
+ return false;
+
+ if (GET_CODE (XEXP (op0, 0)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
+ {
+ const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
+ *sub0 = XEXP (XEXP (op0, 0), 0);
+ }
+ else
+ *sub0 = XEXP (op0, 0);
+
+ if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
+ return false;
+
+ *sub1 = XEXP (op1, 0);
+ return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
+}
+
+/* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
+ format looks something like:
+
+ (IOR (AND (reg1) (~const1))
+ (AND (ASHIFT (reg2) (const2))
+ (const1)))
+
+ where const1 is a consecutive sequence of 1-bits with the
+ least-significant non-zero bit starting at bit position const2. If
+ const2 is zero, then the shift will not appear at all, due to
+ canonicalization. The two arms of the IOR expression may be
+ flipped. */
+static bool
+arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
+{
+ if (GET_CODE (x) != IOR)
+ return false;
+ if (GET_CODE (XEXP (x, 0)) != AND
+ || GET_CODE (XEXP (x, 1)) != AND)
+ return false;
+ return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
+ || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
+}
+
/* RTX costs. Make an estimate of the cost of executing the operation
X, which is contained within an operation with code OUTER_CODE.
SPEED_P indicates whether the cost desired is the performance cost,
*cost = LIBCALL_COST (2);
return false;
case IOR:
- if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
- {
- if (speed_p)
- *cost += extra_cost->alu.rev;
+ {
+ rtx sub0, sub1;
+ if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.rev;
- return true;
- }
- /* Fall through. */
+ return true;
+ }
+ else if (mode == SImode && arm_arch_thumb2
+ && arm_bfi_p (x, &sub0, &sub1))
+ {
+ *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
+ *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
+ if (speed_p)
+ *cost += extra_cost->alu.bfi;
+
+ return true;
+ }
+ }
+
+ /* Fall through. */
case AND: case XOR:
if (mode == SImode)
{
innersize = GET_MODE_UNIT_SIZE (mode);
/* Only support 128-bit vectors for MVE. */
- if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
+ if (TARGET_HAVE_MVE
+ && (!vector
+ || VALID_MVE_PRED_MODE (mode)
+ || n_elts * innersize != 16))
+ return -1;
+
+ if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
return -1;
/* Vectors of float constants. */
return gen_vec_duplicate (mode, x);
}
+/* Return a HI representation of CONST_VEC suitable for MVE predicates. */
+rtx
+mve_bool_vec_to_const (rtx const_vec)
+{
+ machine_mode mode = GET_MODE (const_vec);
+
+ if (!VECTOR_MODE_P (mode))
+ return const_vec;
+
+ unsigned n_elts = GET_MODE_NUNITS (mode);
+ unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
+ unsigned shift_c = 16 / n_elts;
+ unsigned i;
+ int hi_val = 0;
+
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx el = CONST_VECTOR_ELT (const_vec, i);
+ unsigned HOST_WIDE_INT elpart;
+
+ gcc_assert (CONST_INT_P (el));
+ elpart = INTVAL (el) & ((1U << el_prec) - 1);
+
+ unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
+
+ hi_val |= elpart << (index * shift_c);
+ }
+ /* We are using mov immediate to encode this constant which writes 32-bits
+ so we need to make sure the top 16-bits are all 0, otherwise we can't
+ guarantee we can actually write this immediate. */
+ return gen_int_mode (hi_val, SImode);
+}
+
/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
into a register.
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
return const_vec;
+ else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
+ return mve_bool_vec_to_const (const_vec);
else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
pipeline cycle; creating the constant takes one or two ARM
return arm_coproc_mem_operand_wb (op, 0);
}
+/* In non-STRICT mode, return the register number; in STRICT mode return
+ the hard regno or the replacement if it won't be a mem. Otherwise, return
+ the original pseudo number. */
+static int
+arm_effective_regno (rtx op, bool strict)
+{
+ gcc_assert (REG_P (op));
+ if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
+ || !reg_renumber || reg_renumber[REGNO (op)] < 0)
+ return REGNO (op);
+ return reg_renumber[REGNO (op)];
+}
+
/* This function returns TRUE on matching mode and op.
1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
/* Match: (mem (reg)). */
if (REG_P (op))
{
- int reg_no = REGNO (op);
+ reg_no = arm_effective_regno (op, strict);
return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
? reg_no <= LAST_LO_REGNUM
- :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
+ : reg_no < LAST_ARM_REGNUM)
|| (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
}
code = GET_CODE (op);
- if (code == POST_INC || code == PRE_DEC
- || code == PRE_INC || code == POST_DEC)
+ if ((code == POST_INC
+ || code == PRE_DEC
+ || code == PRE_INC
+ || code == POST_DEC)
+ && REG_P (XEXP (op, 0)))
{
- reg_no = REGNO (XEXP (op, 0));
- return ((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
- ? reg_no <= LAST_LO_REGNUM
- :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
- || reg_no >= FIRST_PSEUDO_REGISTER;
+ reg_no = arm_effective_regno (XEXP (op, 0), strict);
+ return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
+ ? reg_no <= LAST_LO_REGNUM
+ :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
+ || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
}
else if (((code == POST_MODIFY || code == PRE_MODIFY)
&& GET_CODE (XEXP (op, 1)) == PLUS
|| (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
&& GET_CODE (XEXP (op, 1)) == CONST_INT))
{
- reg_no = REGNO (XEXP (op, 0));
+ reg_no = arm_effective_regno (XEXP (op, 0), strict);
if (code == PLUS)
val = INTVAL (XEXP (op, 1));
else
case E_V16QImode:
case E_V8QImode:
case E_V4QImode:
- if (abs (val) <= 127)
- return (reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || reg_no >= FIRST_PSEUDO_REGISTER;
- return FALSE;
+ if (abs (val) > 127)
+ return FALSE;
+ break;
case E_V8HImode:
case E_V8HFmode:
case E_V4HImode:
case E_V4HFmode:
- if (val % 2 == 0 && abs (val) <= 254)
- return reg_no <= LAST_LO_REGNUM
- || reg_no >= FIRST_PSEUDO_REGISTER;
- return FALSE;
+ if (val % 2 != 0 || abs (val) > 254)
+ return FALSE;
+ break;
case E_V4SImode:
case E_V4SFmode:
- if (val % 4 == 0 && abs (val) <= 508)
- return (reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || reg_no >= FIRST_PSEUDO_REGISTER;
- return FALSE;
+ if (val % 4 != 0 || abs (val) > 508)
+ return FALSE;
+ break;
default:
return FALSE;
}
+ return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
+ || (MVE_STN_LDW_MODE (mode)
+ ? reg_no <= LAST_LO_REGNUM
+ : (reg_no < LAST_ARM_REGNUM
+ && (code == PLUS || reg_no != SP_REGNUM))));
}
return FALSE;
}
return FALSE;
}
+/* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
+ type. */
+int
+mve_struct_mem_operand (rtx op)
+{
+ rtx ind = XEXP (op, 0);
+
+ /* Match: (mem (reg)). */
+ if (REG_P (ind))
+ return arm_address_register_rtx_p (ind, 0);
+
+ /* Allow only post-increment by the mode size. */
+ if (GET_CODE (ind) == POST_INC)
+ return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+ return FALSE;
+}
+
/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
type. */
int
{
return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
|| REGNO (x) == ARG_POINTER_REGNUM
- || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
- && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+ || VIRTUAL_REGISTER_P (x));
}
/* Return GENERAL_REGS if a scratch register required to reload x to/from
{
len -= 8;
reg0 = gen_reg_rtx (DImode);
- rtx low_reg = NULL_RTX;
- rtx hi_reg = NULL_RTX;
+ rtx first_reg = NULL_RTX;
+ rtx second_reg = NULL_RTX;
if (!src_aligned || !dst_aligned)
{
- low_reg = gen_lowpart (SImode, reg0);
- hi_reg = gen_highpart_mode (SImode, DImode, reg0);
+ if (BYTES_BIG_ENDIAN)
+ {
+ second_reg = gen_lowpart (SImode, reg0);
+ first_reg = gen_highpart_mode (SImode, DImode, reg0);
+ }
+ else
+ {
+ first_reg = gen_lowpart (SImode, reg0);
+ second_reg = gen_highpart_mode (SImode, DImode, reg0);
+ }
}
if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
emit_move_insn (reg0, src);
emit_insn (gen_unaligned_loaddi (reg0, src));
else
{
- emit_insn (gen_unaligned_loadsi (low_reg, src));
+ emit_insn (gen_unaligned_loadsi (first_reg, src));
src = next_consecutive_mem (src);
- emit_insn (gen_unaligned_loadsi (hi_reg, src));
+ emit_insn (gen_unaligned_loadsi (second_reg, src));
}
if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
emit_insn (gen_unaligned_storedi (dst, reg0));
else
{
- emit_insn (gen_unaligned_storesi (dst, low_reg));
+ emit_insn (gen_unaligned_storesi (dst, first_reg));
dst = next_consecutive_mem (dst);
- emit_insn (gen_unaligned_storesi (dst, hi_reg));
+ emit_insn (gen_unaligned_storesi (dst, second_reg));
}
src = next_consecutive_mem (src);
save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
+ if (arm_current_function_pac_enabled_p ())
+ save_reg_mask |= 1 << IP_REGNUM;
+
/* Decide if we need to save the link register.
Interrupt routines have their own banked link register,
so they never need to save it.
{
if (mask & (1 << i))
{
- reg = gen_rtx_REG (SImode, i);
+ /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
+ following example reg-reg copy of SP to IP register is handled
+ through .cfi_def_cfa_register directive and the .cfi_offset
+ directive for IP register is skipped by dwarf code emitter.
+ Example:
+ mov ip, sp
+ .cfi_def_cfa_register 12
+ push {fp, ip, lr, pc}
+ .cfi_offset 11, -16
+ .cfi_offset 13, -12
+ .cfi_offset 14, -8
+
+ Where as Arm-specific .save directive handling is different to that
+ of dwarf code emitter and it doesn't consider reg-reg copies while
+ updating the register list. When PACBTI is enabled we manually
+ updated the .save directive register list to use "ra_auth_code"
+ (pseduo register 143) instead of IP register as shown in following
+ pseduo code.
+ Example:
+ pacbti ip, lr, sp
+ .cfi_register 143, 12
+ push {r3, r7, ip, lr}
+ .save {r3, r7, ra_auth_code, lr}
+ */
+ rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+ if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+ dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
XVECEXP (par, 0, 0)
= gen_rtx_SET (gen_frame_mem
if (dwarf_regs_mask & (1 << i))
{
tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
- reg);
+ dwarf_reg);
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
}
{
if (mask & (1 << i))
{
- reg = gen_rtx_REG (SImode, i);
+ rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+ if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+ dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
(SImode,
plus_constant (Pmode, stack_pointer_rtx,
4 * j)),
- reg);
+ dwarf_reg);
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
}
for (j = 0, i = 0; j < num_regs; i++)
if (saved_regs_mask & (1 << i))
{
- reg = gen_rtx_REG (SImode, i);
+ rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+ if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+ dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
if ((num_regs == 1) && emit_update && !return_in_pc)
{
/* Emit single load with writeback. */
gen_rtx_POST_INC (Pmode,
stack_pointer_rtx));
tmp = emit_insn (gen_rtx_SET (reg, tmp));
- REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
+ dwarf);
return;
}
/* We need to maintain a sequence for DWARF info too. As dwarf info
should not have PC, skip PC. */
if (i != PC_REGNUM)
- dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
j++;
}
/* The static chain register is the same as the IP register. If it is
clobbered when creating the frame, we need to save and restore it. */
- clobber_ip = IS_NESTED (func_type)
- && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
- || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
- || flag_stack_clash_protection)
- && !df_regs_ever_live_p (LR_REGNUM)
- && arm_r3_live_at_start_p ()));
+ clobber_ip = (IS_NESTED (func_type)
+ && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+ || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ || flag_stack_clash_protection)
+ && !df_regs_ever_live_p (LR_REGNUM)
+ && arm_r3_live_at_start_p ()))
+ || arm_current_function_pac_enabled_p ()));
/* Find somewhere to store IP whilst the frame is being created.
We try the following places in order:
{
rtx addr, dwarf;
- gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
saved_regs += 4;
addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
-fp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+ if (arm_current_function_pac_enabled_p ())
+ cfun->machine->pacspval_needed = 1;
}
else
{
RTX_FRAME_RELATED_P (insn) = 1;
fp_offset = args_to_push;
args_to_push = 0;
+ if (arm_current_function_pac_enabled_p ())
+ cfun->machine->pacspval_needed = 1;
}
}
+ if (arm_current_function_pac_enabled_p ())
+ {
+ /* If IP was clobbered we only emit a PAC instruction as the BTI
+ one will be added before the push of the clobbered IP (if
+ necessary) by the bti pass. */
+ if (aarch_bti_enabled () && !clobber_ip)
+ insn = emit_insn (gen_pacbti_nop ());
+ else
+ insn = emit_insn (gen_pac_nop ());
+
+ rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_REGISTER, dwarf);
+ }
+
if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
{
if (IS_INTERRUPT (func_type))
/* Globally reserved letters: acln
Puncutation letters currently used: @_|?().!#
Lower case letters currently used: bcdefhimpqtvwxyz
- Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
- Letters previously used, but now deprecated/obsolete: sVWXYZ.
+ Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
+ Letters previously used, but now deprecated/obsolete: sWXYZ.
Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
If CODE is 'N' then X is a floating point operand that must be negated
before output.
If CODE is 'B' then output a bitwise inverted value of X (a const int).
- If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
+ If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
+ If CODE is 'V', then the operand must be a CONST_INT representing
+ the bits to preserve in the modified register (Rd) of a BFI or BFC
+ instruction: print out both the width and lsb (shift) fields. */
static void
arm_print_operand (FILE *stream, rtx x, int code)
{
stream);
return;
- case 's':
case 'V':
+ {
+ /* Output the LSB (shift) and width for a bitmask instruction
+ based on a literal mask. The LSB is printed first,
+ followed by the width.
+
+ Eg. For 0b1...1110001, the result is #1, #3. */
+ if (!CONST_INT_P (x))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ unsigned HOST_WIDE_INT val
+ = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
+ int lsb = exact_log2 (val & -val);
+ asm_fprintf (stream, "#%d, #%d", lsb,
+ (exact_log2 (val + (val & -val)) - lsb));
+ }
+ return;
+
+ case 's':
case 'W':
case 'X':
case 'Y':
static unsigned int
arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
{
+ if (IS_VPR_REGNUM (regno))
+ return CEIL (GET_MODE_SIZE (mode), 2);
+
if (TARGET_32BIT
&& regno > PC_REGNUM
&& regno != FRAME_POINTER_REGNUM
return false;
if (IS_VPR_REGNUM (regno))
- return mode == HImode;
+ return VALID_MVE_PRED_MODE (mode);
if (TARGET_THUMB1)
/* For the Thumb we only allow values bigger than SImode in
if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
return true;
+ if (TARGET_HAVE_MVE
+ && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
+ return true;
+
/* We specifically want to allow elements of "structure" modes to
be tieable to the structure. This more general condition allows
other rarer situations too. */
if (IS_VPR_REGNUM (regno))
return VPR_REG;
+ if (IS_PAC_REGNUM (regno))
+ return PAC_REG;
+
if (TARGET_THUMB1)
{
if (regno == STACK_POINTER_REGNUM)
machine->func_type = ARM_FT_UNKNOWN;
#endif
machine->static_chain_stack_bytes = -1;
+ machine->pacspval_needed = 0;
return machine;
}
to assert it for now to ensure that future code changes do not silently
change this behavior. */
gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
- if (num_regs == 1)
+ if (arm_current_function_pac_enabled_p ())
+ {
+ gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ emit_insn (gen_aut_nop ());
+ emit_jump_insn (simple_return_rtx);
+ }
+ else if (num_regs == 1)
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
&& really_return
&& crtl->args.pretend_args_size == 0
&& saved_regs_mask & (1 << LR_REGNUM)
- && !crtl->calls_eh_return)
+ && !crtl->calls_eh_return
+ && !arm_current_function_pac_enabled_p ())
{
saved_regs_mask &= ~(1 << LR_REGNUM);
saved_regs_mask |= (1 << PC_REGNUM);
}
}
+ if (arm_current_function_pac_enabled_p ())
+ emit_insn (gen_aut_nop ());
+
if (!really_return)
return;
arm_file_start (void)
{
int val;
+ bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+ bool bti = (aarch_enable_bti == 1);
arm_print_asm_arch_directives
(asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
(int) arm_fp16_format);
+ if (TARGET_HAVE_PACBTI)
+ {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
+ }
+ else if (pac || bti)
+ {
+ arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
+ arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
+ }
+
+ if (bti)
+ arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
+ if (pac)
+ arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
+
if (arm_lang_output_object_attributes_hook)
arm_lang_output_object_attributes_hook();
}
if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
{
nregs = pcum->aapcs_ncrn;
- if (nregs & 1)
+ if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
+ && (nregs & 1))
{
int res = arm_needs_doubleword_align (arg.mode, arg.type);
if (res < 0 && warn_psabi)
return true;
if (TARGET_HAVE_MVE
- && (mode == V2DImode || mode == V4SImode || mode == V8HImode
- || mode == V16QImode))
- return true;
+ && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
+ return true;
if (TARGET_HAVE_MVE_FLOAT
&& (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
- return true;
+ return true;
return false;
}
|| rclass == CC_REG)
return true;
- return false;
+ return default_class_likely_spilled_p (rclass);
}
/* Implements target hook small_register_classes_for_mode_p. */
/* Map internal gcc register numbers to DWARF2 register numbers. */
unsigned int
-arm_dbx_register_number (unsigned int regno)
+arm_debugger_regno (unsigned int regno)
{
if (regno < 16)
return regno;
if (IS_IWMMXT_REGNUM (regno))
return 112 + regno - FIRST_IWMMXT_REGNUM;
+ if (IS_PAC_REGNUM (regno))
+ return DWARF_PAC_REGNUM;
+
return DWARF_FRAME_REGISTERS;
}
gcc_assert (nregs);
reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
- if (reg < 16)
+ if (reg < 16 || IS_PAC_REGNUM (reg))
{
/* For -Os dummy registers can be pushed at the beginning to
avoid separate stack pointer adjustment. */
double precision register names. */
if (IS_VFP_REGNUM (reg))
asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
+ else if (IS_PAC_REGNUM (reg))
+ asm_fprintf (asm_out_file, "ra_auth_code");
else
asm_fprintf (out_file, "%r", reg);
/* Move from sp to reg. */
asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
}
- else if (GET_CODE (e1) == PLUS
+ else if (GET_CODE (e1) == PLUS
&& REG_P (XEXP (e1, 0))
&& REGNO (XEXP (e1, 0)) == SP_REGNUM
&& CONST_INT_P (XEXP (e1, 1)))
asm_fprintf (out_file, "\t.movsp %r, #%d\n",
REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
}
+ else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
+ {
+ if (cfun->machine->pacspval_needed)
+ asm_fprintf (out_file, "\t.pacspval\n");
+ }
else
abort ();
break;
src = SET_SRC (pat);
dest = SET_DEST (pat);
- gcc_assert (src == stack_pointer_rtx);
+ gcc_assert (src == stack_pointer_rtx
+ || IS_PAC_REGNUM (REGNO (src)));
reg = REGNO (dest);
- asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
- reg + 0x90, reg);
+
+ if (IS_PAC_REGNUM (REGNO (src)))
+ arm_unwind_emit_set (out_file, PATTERN (insn));
+ else
+ asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
+ reg + 0x90, reg);
}
handled_one = true;
break;
return "";
}
+/* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
+ Responsible for the handling of switch statements in arm. */
+const char *
+arm_output_casesi (rtx *operands)
+{
+ char label[100];
+ rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
+ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+ output_asm_insn ("cmp\t%0, %1", operands);
+ output_asm_insn ("bhi\t%l3", operands);
+ ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
+ switch (GET_MODE (diff_vec))
+ {
+ case E_QImode:
+ if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+ output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
+ else
+ output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
+ output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+ break;
+ case E_HImode:
+ if (REGNO (operands[4]) != REGNO (operands[5]))
+ {
+ output_asm_insn ("add\t%4, %0, %0", operands);
+ if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+ output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
+ else
+ output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
+ }
+ else
+ {
+ output_asm_insn ("add\t%4, %5, %0", operands);
+ if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+ output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
+ else
+ output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
+ }
+ output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+ break;
+ case E_SImode:
+ if (flag_pic)
+ {
+ output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
+ output_asm_insn ("add\t%|pc, %|pc, %4", operands);
+ }
+ else
+ output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ assemble_label (asm_out_file, label);
+ output_asm_insn ("nop", operands);
+ return "";
+}
+
/* Output a Thumb-1 casesi dispatch sequence. */
const char *
thumb1_output_casesi (rtx *operands)
return "St9__va_list";
/* Half-precision floating point types. */
- if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+ if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
{
+ if (TYPE_MAIN_VARIANT (type) == float16_type_node)
+ return NULL;
if (TYPE_MODE (type) == BFmode)
return "u6__bf16";
else
arm_post_atomic_barrier (model);
}
\f
+/* Return the mode for the MVE vector of predicates corresponding to MODE. */
+opt_machine_mode
+arm_mode_to_pred_mode (machine_mode mode)
+{
+ switch (GET_MODE_NUNITS (mode))
+ {
+ case 16: return V16BImode;
+ case 8: return V8BImode;
+ case 4: return V4BImode;
+ case 2: return V2QImode;
+ }
+ return opt_machine_mode ();
+}
+
/* Expand code to compare vectors OP0 and OP1 using condition CODE.
If CAN_INVERT, store either the result or its inverse in TARGET
and return true if TARGET contains the inverse. If !CAN_INVERT,
always store the result in TARGET, never its inverse.
- If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do
- it with the right destination type to avoid emiting two vpsel, one here and
- one in arm_expand_vcond.
-
Note that the handling of floating-point comparisons is not
IEEE compliant. */
bool
arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
- bool can_invert, bool vcond_mve)
+ bool can_invert)
{
machine_mode cmp_result_mode = GET_MODE (target);
machine_mode cmp_mode = GET_MODE (op0);
and then store its inverse in TARGET. This avoids reusing
TARGET (which for integer NE could be one of the inputs). */
rtx tmp = gen_reg_rtx (cmp_result_mode);
- if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve))
+ if (arm_expand_vector_compare (tmp, code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
return false;
case NE:
if (TARGET_HAVE_MVE)
{
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (HImode);
-
switch (GET_MODE_CLASS (cmp_mode))
{
case MODE_VECTOR_INT:
- emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
break;
case MODE_VECTOR_FLOAT:
if (TARGET_HAVE_MVE_FLOAT)
- emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
else
gcc_unreachable ();
break;
default:
gcc_unreachable ();
}
-
- /* If we are not expanding a vcond, build the result here. */
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
}
else
emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
case GEU:
case GTU:
if (TARGET_HAVE_MVE)
- {
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (HImode);
-
- emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
- }
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
else
emit_insn (gen_neon_vc (code, cmp_mode, target,
op0, force_reg (cmp_mode, op1)));
case LEU:
case LTU:
if (TARGET_HAVE_MVE)
- {
- rtx vpr_p0;
- if (vcond_mve)
- vpr_p0 = target;
- else
- vpr_p0 = gen_reg_rtx (HImode);
-
- emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
- if (!vcond_mve)
- {
- rtx zero = gen_reg_rtx (cmp_result_mode);
- rtx one = gen_reg_rtx (cmp_result_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
- emit_move_insn (one, CONST1_RTX (cmp_result_mode));
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
- }
- }
+ emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
+ force_reg (cmp_mode, op1), op0));
else
emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
target, force_reg (cmp_mode, op1), op0));
rtx gt_res = gen_reg_rtx (cmp_result_mode);
rtx alt_res = gen_reg_rtx (cmp_result_mode);
rtx_code alt_code = (code == LTGT ? LT : LE);
- if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve)
- || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve))
+ if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+ || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
gt_res, alt_res)));
{
/* When expanding for MVE, we do not want to emit a (useless) vpsel in
arm_expand_vector_compare, and another one here. */
- bool vcond_mve=false;
rtx mask;
if (TARGET_HAVE_MVE)
- {
- vcond_mve=true;
- mask = gen_reg_rtx (HImode);
- }
+ mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
else
mask = gen_reg_rtx (cmp_result_mode);
bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
- operands[4], operands[5], true, vcond_mve);
+ operands[4], operands[5], true);
if (inverted)
std::swap (operands[1], operands[2]);
if (TARGET_NEON)
mask, operands[1], operands[2]));
else
{
- machine_mode cmp_mode = GET_MODE (operands[4]);
- rtx vpr_p0 = mask;
- rtx zero = gen_reg_rtx (cmp_mode);
- rtx one = gen_reg_rtx (cmp_mode);
- emit_move_insn (zero, CONST0_RTX (cmp_mode));
- emit_move_insn (one, CONST1_RTX (cmp_mode));
+ machine_mode cmp_mode = GET_MODE (operands[0]);
+
switch (GET_MODE_CLASS (cmp_mode))
{
case MODE_VECTOR_INT:
- emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0));
+ emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
+ operands[1], operands[2], mask));
break;
case MODE_VECTOR_FLOAT:
if (TARGET_HAVE_MVE_FLOAT)
- emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0));
+ emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
+ operands[1], operands[2], mask));
+ else
+ gcc_unreachable ();
break;
default:
gcc_unreachable ();
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
+arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
+ rtx target, rtx op0, rtx op1,
const vec_perm_indices &sel)
{
+ if (vmode != op_mode)
+ return false;
+
struct expand_vec_perm_d d;
int i, nelt, which;
return current_tune->fusible_ops & op;
}
+/* Return TRUE if return address signing mechanism is enabled. */
+bool
+arm_current_function_pac_enabled_p (void)
+{
+ return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
+ || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
+ && !crtl->is_leaf));
+}
+
+/* Raise an error if the current target arch is not bti compatible. */
+void aarch_bti_arch_check (void)
+{
+ if (!arm_arch8m_main)
+ error ("This architecture does not support branch protection instructions");
+}
+
+/* Return TRUE if Branch Target Identification Mechanism is enabled. */
+bool
+aarch_bti_enabled (void)
+{
+ return aarch_enable_bti != 0;
+}
+
+/* Check if INSN is a BTI J insn. */
+bool
+aarch_bti_j_insn_p (rtx_insn *insn)
+{
+ if (!insn || !INSN_P (insn))
+ return false;
+
+ rtx pat = PATTERN (insn);
+ return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
+}
+
+/* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
+bool
+aarch_pac_insn_p (rtx x)
+{
+ if (!x || !INSN_P (x))
+ return false;
+
+ rtx pat = PATTERN (x);
+
+ if (GET_CODE (pat) == SET)
+ {
+ rtx tmp = XEXP (pat, 1);
+ if (tmp
+ && ((GET_CODE (tmp) == UNSPEC
+ && XINT (tmp, 1) == UNSPEC_PAC_NOP)
+ || (GET_CODE (tmp) == UNSPEC_VOLATILE
+ && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
+ return true;
+ }
+
+ return false;
+}
+
+ /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
+ For Arm, both of these map to a simple BTI instruction. */
+
+rtx
+aarch_gen_bti_c (void)
+{
+ return gen_bti_nop ();
+}
+
+rtx
+aarch_gen_bti_j (void)
+{
+ return gen_bti_nop ();
+}
+
/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
scheduled for speculative execution. Reject the long-running division
and square-root instructions. */
#define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
#endif /* CHECKING_P */
+/* Implement TARGET_STACK_PROTECT_GUARD. In case of a
+ global variable based guard use the default else
+ return a null tree. */
+static tree
+arm_stack_protect_guard (void)
+{
+ if (arm_stack_protector_guard == SSP_GLOBAL)
+ return default_stack_protect_guard ();
+
+ return NULL_TREE;
+}
+
/* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
Unlike the arm version, we do NOT implement asm flag outputs. */
rtx_insn *
thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ vec<const char *> &constraints,
+ vec<rtx> &, vec<rtx> & /*clobbers*/,
HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
{
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
struct gcc_target targetm = TARGET_INITIALIZER;
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
+
+opt_machine_mode
+arm_get_mask_mode (machine_mode mode)
+{
+ if (TARGET_HAVE_MVE)
+ return arm_mode_to_pred_mode (mode);
+
+ return default_get_mask_mode (mode);
+}
+
+/* Output assembly to read the thread pointer from the appropriate TPIDR
+ register into DEST. If PRED_P also emit the %? that can be used to
+ output the predication code. */
+
+const char *
+arm_output_load_tpidr (rtx dst, bool pred_p)
+{
+ char buf[64];
+ int tpidr_coproc_num = -1;
+ switch (target_thread_pointer)
+ {
+ case TP_TPIDRURW:
+ tpidr_coproc_num = 2;
+ break;
+ case TP_TPIDRURO:
+ tpidr_coproc_num = 3;
+ break;
+ case TP_TPIDRPRW:
+ tpidr_coproc_num = 4;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ snprintf (buf, sizeof (buf),
+ "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
+ pred_p ? "%?" : "", tpidr_coproc_num);
+ output_asm_insn (buf, &dst);
+ return "";
+}
+
#include "gt-arm.h"