/* Machine description for AArch64 architecture.
- Copyright (C) 2009-2019 Free Software Foundation, Inc.
+ Copyright (C) 2009-2020 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
1, /* vec_int_stmt_cost */
1, /* vec_fp_stmt_cost */
2, /* vec_permute_cost */
- 1, /* vec_to_scalar_cost */
+ 2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* vec_align_load_cost */
1, /* vec_unalign_load_cost */
SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
- AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
+ AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
"8", /* jump_align. */
"8", /* loop_align. */
SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
- AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
+ AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */
"8", /* jump_align. */
"8", /* loop_align. */
SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
- (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
- | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
"16", /* function_align. */
"4", /* jump_align. */
"8", /* loop_align. */
SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */
4, /* issue_rate. */
- (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
- | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
+ (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
"16", /* function_align. */
"8", /* jump_align. */
"16", /* loop_align. */
" vector types", "+nofp");
}
+/* Report when we try to do something that requires SVE when SVE is disabled.
+ This is an error of last resort and isn't very high-quality. It usually
+ involves attempts to measure the vector length in some way. */
+static void
+aarch64_report_sve_required (void)
+{
+ static bool reported_p = false;
+
+ /* Avoid reporting a slew of messages for a single oversight. */
+ if (reported_p)
+ return;
+
+ error ("this operation requires the SVE ISA extension");
+ inform (input_location, "you can enable SVE using the command-line"
+ " option %<-march%>, or by using the %<target%>"
+ " attribute or pragma");
+ reported_p = true;
+}
+
/* Return true if REGNO is P0-P15 or one of the special FFR-related
registers. */
inline bool
if (GP_REGNUM_P (regno))
{
+ if (vec_flags & VEC_ANY_SVE)
+ return false;
if (known_le (GET_MODE_SIZE (mode), 8))
return true;
- else if (known_le (GET_MODE_SIZE (mode), 16))
+ if (known_le (GET_MODE_SIZE (mode), 16))
return (regno & 1) == 0;
}
else if (FP_REGNUM_P (regno))
folding it into the relocation. */
if (!offset.is_constant (&const_offset))
{
+ if (!TARGET_SVE)
+ {
+ aarch64_report_sve_required ();
+ return;
+ }
if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
emit_insn (gen_rtx_SET (dest, imm));
else
return true;
}
-/* Implement TARGET_FUNCTION_VALUE.
- Define how to find the value returned by a function. */
-
+/* Subroutine of aarch64_function_value. MODE is the mode of the argument
+ after promotion, and after partial SVE types have been replaced by
+ their integer equivalents. */
static rtx
-aarch64_function_value (const_tree type, const_tree func,
- bool outgoing ATTRIBUTE_UNUSED)
+aarch64_function_value_1 (const_tree type, machine_mode mode)
{
- machine_mode mode;
- int unsignedp;
- int count;
- machine_mode ag_mode;
-
- mode = TYPE_MODE (type);
- if (INTEGRAL_TYPE_P (type))
- mode = promote_function_mode (type, mode, &unsignedp, func, 1);
-
unsigned int num_zr, num_pr;
if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr))
{
}
}
+ int count;
+ machine_mode ag_mode;
if (aarch64_vfp_is_call_or_return_candidate (mode, type,
&ag_mode, &count, NULL))
{
return gen_rtx_REG (mode, R0_REGNUM);
}
+/* Implement TARGET_FUNCTION_VALUE.
+ Define how to find the value returned by a function. */
+
+static rtx
+aarch64_function_value (const_tree type, const_tree func,
+ bool outgoing ATTRIBUTE_UNUSED)
+{
+ machine_mode mode;
+ int unsignedp;
+
+ mode = TYPE_MODE (type);
+ if (INTEGRAL_TYPE_P (type))
+ mode = promote_function_mode (type, mode, &unsignedp, func, 1);
+
+ /* Vector types can acquire a partial SVE mode using things like
+ __attribute__((vector_size(N))), and this is potentially useful.
+ However, the choice of mode doesn't affect the type's ABI identity,
+ so we should treat the types as though they had the associated
+ integer mode, just like they did before SVE was introduced.
+
+ We know that the vector must be 128 bits or smaller, otherwise we'd
+ have returned it in memory instead. */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if ((vec_flags & VEC_ANY_SVE) && (vec_flags & VEC_PARTIAL))
+ {
+ scalar_int_mode int_mode = int_mode_for_mode (mode).require ();
+ rtx reg = aarch64_function_value_1 (type, int_mode);
+ /* Vector types are never returned in the MSB and are never split. */
+ gcc_assert (REG_P (reg) && GET_MODE (reg) == int_mode);
+ rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, pair));
+ }
+
+ return aarch64_function_value_1 (type, mode);
+}
+
/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
Return true if REGNO is the number of a hard register in which the values
of called function may come back. */
}
/* Layout a function argument according to the AAPCS64 rules. The rule
- numbers refer to the rule numbers in the AAPCS64. */
+ numbers refer to the rule numbers in the AAPCS64. ORIG_MODE is the
+ mode that was originally given to us by the target hook, whereas the
+ mode in ARG might be the result of replacing partial SVE modes with
+ the equivalent integer mode. */
static void
-aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
+aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg,
+ machine_mode orig_mode)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
tree type = arg.type;
if (pcum->aapcs_arg_processed)
return;
+ /* Vector types can acquire a partial SVE mode using things like
+ __attribute__((vector_size(N))), and this is potentially useful.
+ However, the choice of mode doesn't affect the type's ABI identity,
+ so we should treat the types as though they had the associated
+ integer mode, just like they did before SVE was introduced.
+
+ We know that the vector must be 128 bits or smaller, otherwise we'd
+ have passed it by reference instead. */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if ((vec_flags & VEC_ANY_SVE) && (vec_flags & VEC_PARTIAL))
+ {
+ function_arg_info tmp_arg = arg;
+ tmp_arg.mode = int_mode_for_mode (mode).require ();
+ aarch64_layout_arg (pcum_v, tmp_arg, orig_mode);
+ if (rtx reg = pcum->aapcs_reg)
+ {
+ gcc_assert (REG_P (reg) && GET_MODE (reg) == tmp_arg.mode);
+ rtx pair = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+ pcum->aapcs_reg = gen_rtx_PARALLEL (mode, gen_rtvec (1, pair));
+ }
+ return;
+ }
+
pcum->aapcs_arg_processed = true;
unsigned int num_zr, num_pr;
comparison is there because for > 16 * BITS_PER_UNIT
alignment nregs should be > 2 and therefore it should be
passed by reference rather than value. */
- && (aarch64_function_arg_alignment (mode, type, &abi_break)
+ && (aarch64_function_arg_alignment (orig_mode, type, &abi_break)
== 16 * BITS_PER_UNIT))
{
if (abi_break && warn_psabi && currently_expanding_gimple_stmt)
on_stack:
pcum->aapcs_stack_words = size / UNITS_PER_WORD;
- if (aarch64_function_arg_alignment (mode, type, &abi_break)
+ if (aarch64_function_arg_alignment (orig_mode, type, &abi_break)
== 16 * BITS_PER_UNIT)
{
int new_size = ROUND_UP (pcum->aapcs_stack_size, 16 / UNITS_PER_WORD);
if (arg.end_marker_p ())
return gen_int_mode (pcum->pcs_variant, DImode);
- aarch64_layout_arg (pcum_v, arg);
+ aarch64_layout_arg (pcum_v, arg, arg.mode);
return pcum->aapcs_reg;
}
|| pcum->pcs_variant == ARM_PCS_SIMD
|| pcum->pcs_variant == ARM_PCS_SVE)
{
- aarch64_layout_arg (pcum_v, arg);
+ aarch64_layout_arg (pcum_v, arg, arg.mode);
gcc_assert ((pcum->aapcs_reg != NULL_RTX)
!= (pcum->aapcs_stack_words != 0));
pcum->aapcs_arg_processed = false;
}
}
+/* Return true if STMT_INFO extends the result of a load. */
+static bool
+aarch64_extending_load_p (stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+ return false;
+
+ tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+ tree rhs_type = TREE_TYPE (rhs);
+ if (!INTEGRAL_TYPE_P (lhs_type)
+ || !INTEGRAL_TYPE_P (rhs_type)
+ || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
+ return false;
+
+ stmt_vec_info def_stmt_info = stmt_info->vinfo->lookup_def (rhs);
+ return (def_stmt_info
+ && STMT_VINFO_DATA_REF (def_stmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
+}
+
+/* Return true if STMT_INFO is an integer truncation. */
+static bool
+aarch64_integer_truncation_p (stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+ return false;
+
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+ tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
+ return (INTEGRAL_TYPE_P (lhs_type)
+ && INTEGRAL_TYPE_P (rhs_type)
+ && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
+}
+
+/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
+ for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary
+ for SVE targets. */
+static unsigned int
+aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+ unsigned int stmt_cost)
+{
+ /* Unlike vec_promote_demote, vector_stmt conversions do not change the
+ vector register size or number of units. Integer promotions of this
+ type therefore map to SXT[BHW] or UXT[BHW].
+
+ Most loads have extending forms that can do the sign or zero extension
+ on the fly. Optimistically assume that a load followed by an extension
+ will fold to this form during combine, and that the extension therefore
+ comes for free. */
+ if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
+ stmt_cost = 0;
+
+ /* For similar reasons, vector_stmt integer truncations are a no-op,
+ because we can just ignore the unused upper bits of the source. */
+ if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
+ stmt_cost = 0;
+
+ return stmt_cost;
+}
+
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
int stmt_cost =
aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+ if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
+ stmt_cost = aarch64_sve_adjust_stmt_cost (kind, stmt_info, stmt_cost);
+
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
return NULL;
}
+/* Implement TARGET_VERIFY_TYPE_CONTEXT. */
+
+static bool
+aarch64_verify_type_context (location_t loc, type_context_kind context,
+ const_tree type, bool silent_p)
+{
+ return aarch64_sve::verify_type_context (loc, context, type, silent_p);
+}
+
/* Find the first rtx_insn before insn that will generate an assembly
instruction. */
}
}
- unsigned int elt_size = GET_MODE_SIZE (elt_mode);
+ /* If all elements in an SVE vector have the same value, we have a free
+ choice between using the element mode and using the container mode.
+ Using the element mode means that unused parts of the vector are
+ duplicates of the used elements, while using the container mode means
+ that the unused parts are an extension of the used elements. Using the
+ element mode is better for (say) VNx4HI 0x101, since 0x01010101 is valid
+ for its container mode VNx4SI while 0x00000101 isn't.
+
+ If not all elements in an SVE vector have the same value, we need the
+ transition from one element to the next to occur at container boundaries.
+ E.g. a fixed-length VNx4HI containing { 1, 2, 3, 4 } should be treated
+ in the same way as a VNx4SI containing { 1, 2, 3, 4 }. */
+ scalar_int_mode elt_int_mode;
+ if ((vec_flags & VEC_SVE_DATA) && n_elts > 1)
+ elt_int_mode = aarch64_sve_container_int_mode (mode);
+ else
+ elt_int_mode = int_mode_for_mode (elt_mode).require ();
+
+ unsigned int elt_size = GET_MODE_SIZE (elt_int_mode);
if (elt_size > 8)
return false;
- scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
-
/* Expand the vector constant out into a byte vector, with the least
significant byte of the register first. */
auto_vec<unsigned char, 16> bytes;
if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
return true;
- if (aarch64_sve_cnt_immediate_p (x))
+ if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
return true;
return aarch64_classify_symbolic_expression (x)
}
}
+ /* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
+ && prev_set && curr_set && any_condjump_p (curr)
+ && GET_CODE (SET_SRC (prev_set)) == COMPARE
+ && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
+ && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
+ return true;
+
+ /* Fuse flag-setting ALU instructions and conditional branch. */
+ if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
&& any_condjump_p (curr))
{
unsigned int condreg1, condreg2;
}
}
+ /* Fuse ALU instructions and CBZ/CBNZ. */
if (prev_set
&& curr_set
- && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
&& any_condjump_p (curr))
{
/* We're trying to match:
aarch64_can_change_mode_class (machine_mode from,
machine_mode to, reg_class_t)
{
+ unsigned int from_flags = aarch64_classify_vector_mode (from);
+ unsigned int to_flags = aarch64_classify_vector_mode (to);
+
+ bool from_sve_p = (from_flags & VEC_ANY_SVE);
+ bool to_sve_p = (to_flags & VEC_ANY_SVE);
+
+ bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
+ bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
+
+ /* Don't allow changes between partial SVE modes and other modes.
+ The contents of partial SVE modes are distributed evenly across
+ the register, whereas GCC expects them to be clustered together. */
+ if (from_partial_sve_p != to_partial_sve_p)
+ return false;
+
+ /* Similarly reject changes between partial SVE modes that have
+ different patterns of significant and insignificant bits. */
+ if (from_partial_sve_p
+ && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
+ || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
+ return false;
+
if (BYTES_BIG_ENDIAN)
{
- bool from_sve_p = aarch64_sve_data_mode_p (from);
- bool to_sve_p = aarch64_sve_data_mode_p (to);
-
/* Don't allow changes between SVE data modes and non-SVE modes.
See the comment at the head of aarch64-sve.md for details. */
if (from_sve_p != to_sve_p)
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE aarch64_mangle_type
+#undef TARGET_VERIFY_TYPE_CONTEXT
+#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context
+
#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost