gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73 #include "tree-vectorizer.h"
  74 #include "opts.h"
  75 #include "aarch-common.h"
  76 #include "aarch-common-protos.h"
  77
  78 /* This file should be included last.  */
  79 #include "target-def.h"
  80
  81 /* Forward definitions of types.  */
  82 typedef struct minipool_node    Mnode;
  83 typedef struct minipool_fixup   Mfix;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_insn_cost (rtx_insn *, bool);
 185 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 186 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 187 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 188 static void emit_constant_insn (rtx cond, rtx pattern);
 189 static rtx_insn *emit_set_insn (rtx, rtx);
 190 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 191 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 192 static void arm_emit_multi_reg_pop (unsigned long);
 193 static int vfp_emit_fstmd (int, int);
 194 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 195 static int arm_arg_partial_bytes (cumulative_args_t,
 196                                   const function_arg_info &);
 197 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 198 static void arm_function_arg_advance (cumulative_args_t,
 199                                       const function_arg_info &);
 200 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 201 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 202 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 203                                       const_tree);
 204 static rtx aapcs_libcall_value (machine_mode);
 205 static int aapcs_select_return_coproc (const_tree, const_tree);
 206
 207 #ifdef OBJECT_FORMAT_ELF
 208 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 209 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 210 #endif
 211 #ifndef ARM_PE
 212 static void arm_encode_section_info (tree, rtx, int);
 213 #endif
 214
 215 static void arm_file_end (void);
 216 static void arm_file_start (void);
 217 static void arm_insert_attributes (tree, tree *);
 218
 219 static void arm_setup_incoming_varargs (cumulative_args_t,
 220                                         const function_arg_info &, int *, int);
 221 static bool arm_pass_by_reference (cumulative_args_t,
 222                                    const function_arg_info &);
 223 static bool arm_promote_prototypes (const_tree);
 224 static bool arm_default_short_enums (void);
 225 static bool arm_align_anon_bitfield (void);
 226 static bool arm_return_in_msb (const_tree);
 227 static bool arm_must_pass_in_stack (const function_arg_info &);
 228 static bool arm_return_in_memory (const_tree, const_tree);
 229 #if ARM_UNWIND_INFO
 230 static void arm_unwind_emit (FILE *, rtx_insn *);
 231 static bool arm_output_ttype (rtx);
 232 static void arm_asm_emit_except_personality (rtx);
 233 #endif
 234 static void arm_asm_init_sections (void);
 235 static rtx arm_dwarf_register_span (rtx);
 236
 237 static tree arm_cxx_guard_type (void);
 238 static bool arm_cxx_guard_mask_bit (void);
 239 static tree arm_get_cookie_size (tree);
 240 static bool arm_cookie_has_size (void);
 241 static bool arm_cxx_cdtor_returns_this (void);
 242 static bool arm_cxx_key_method_may_be_inline (void);
 243 static void arm_cxx_determine_class_data_visibility (tree);
 244 static bool arm_cxx_class_data_always_comdat (void);
 245 static bool arm_cxx_use_aeabi_atexit (void);
 246 static void arm_init_libfuncs (void);
 247 static tree arm_build_builtin_va_list (void);
 248 static void arm_expand_builtin_va_start (tree, rtx);
 249 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 250 static void arm_option_override (void);
 251 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 252                                 struct cl_target_option *);
 253 static void arm_override_options_after_change (void);
 254 static void arm_option_print (FILE *, int, struct cl_target_option *);
 255 static void arm_set_current_function (tree);
 256 static bool arm_can_inline_p (tree, tree);
 257 static void arm_relayout_function (tree);
 258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 260 static bool arm_sched_can_speculate_insn (rtx_insn *);
 261 static bool arm_macro_fusion_p (void);
 262 static bool arm_cannot_copy_insn_p (rtx_insn *);
 263 static int arm_issue_rate (void);
 264 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 265 static int arm_first_cycle_multipass_dfa_lookahead (void);
 266 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 267 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 268 static bool arm_output_addr_const_extra (FILE *, rtx);
 269 static bool arm_allocate_stack_slots_for_args (void);
 270 static bool arm_warn_func_return (tree);
 271 static tree arm_promoted_type (const_tree t);
 272 static bool arm_scalar_mode_supported_p (scalar_mode);
 273 static bool arm_frame_pointer_required (void);
 274 static bool arm_can_eliminate (const int, const int);
 275 static void arm_asm_trampoline_template (FILE *);
 276 static void arm_trampoline_init (rtx, tree, rtx);
 277 static rtx arm_trampoline_adjust_address (rtx);
 278 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 282 static bool arm_array_mode_supported_p (machine_mode,
 283                                         unsigned HOST_WIDE_INT);
 284 static machine_mode arm_preferred_simd_mode (scalar_mode);
 285 static bool arm_class_likely_spilled_p (reg_class_t);
 286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 289                                                      const_tree type,
 290                                                      int misalignment,
 291                                                      bool is_packed);
 292 static void arm_conditional_register_usage (void);
 293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 296 static int arm_default_branch_cost (bool, bool);
 297 static int arm_cortex_a5_branch_cost (bool, bool);
 298 static int arm_cortex_m_branch_cost (bool, bool);
 299 static int arm_cortex_m7_branch_cost (bool, bool);
 300
 301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 302                                           rtx, const vec_perm_indices &);
 303
 304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 305
 306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 307                                            tree vectype,
 308                                            int misalign ATTRIBUTE_UNUSED);
 309
 310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 311                                          bool op0_preserve_value);
 312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 313
 314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 316                                      const_tree);
 317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 320                                                 int reloc);
 321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 328                                        vec<machine_mode> &,
 329                                        vec<const char *> &, vec<rtx> &,
 330                                        HARD_REG_SET &, location_t);
 331 static const char *arm_identify_fpu_from_isa (sbitmap);
 332 \f
 333 /* Table of machine attributes.  */
 334 static const struct attribute_spec arm_attribute_table[] =
 335 {
 336   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 337        affects_type_identity, handler, exclude } */
 338   /* Function calls made to this symbol must be done indirectly, because
 339      it may lie outside of the 26 bit addressing range of a normal function
 340      call.  */
 341   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 342   /* Whereas these functions are always known to reside within the 26 bit
 343      addressing range.  */
 344   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 345   /* Specify the procedure call conventions for a function.  */
 346   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 347     NULL },
 348   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 349   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 350     NULL },
 351   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 352     NULL },
 353   { "naked",        0, 0, true,  false, false, false,
 354     arm_handle_fndecl_attribute, NULL },
 355 #ifdef ARM_PE
 356   /* ARM/PE has three new attributes:
 357      interfacearm - ?
 358      dllexport - for exporting a function/variable that will live in a dll
 359      dllimport - for importing a function/variable from a dll
 360
 361      Microsoft allows multiple declspecs in one __declspec, separating
 362      them with spaces.  We do NOT support this.  Instead, use __declspec
 363      multiple times.
 364   */
 365   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 366   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 367   { "interfacearm", 0, 0, true,  false, false, false,
 368     arm_handle_fndecl_attribute, NULL },
 369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 370   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 371     NULL },
 372   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 373     NULL },
 374   { "notshared",    0, 0, false, true, false, false,
 375     arm_handle_notshared_attribute, NULL },
 376 #endif
 377   /* ARMv8-M Security Extensions support.  */
 378   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 379     arm_handle_cmse_nonsecure_entry, NULL },
 380   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 381     arm_handle_cmse_nonsecure_call, NULL },
 382   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
 383   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 384 };
 385 \f
 386 /* Initialize the GCC target structure.  */
 387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 388 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 390 #endif
 391
 392 #undef TARGET_CHECK_BUILTIN_CALL
 393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 394
 395 #undef TARGET_LEGITIMIZE_ADDRESS
 396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 397
 398 #undef  TARGET_ATTRIBUTE_TABLE
 399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 400
 401 #undef  TARGET_INSERT_ATTRIBUTES
 402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 403
 404 #undef TARGET_ASM_FILE_START
 405 #define TARGET_ASM_FILE_START arm_file_start
 406 #undef TARGET_ASM_FILE_END
 407 #define TARGET_ASM_FILE_END arm_file_end
 408
 409 #undef  TARGET_ASM_ALIGNED_SI_OP
 410 #define TARGET_ASM_ALIGNED_SI_OP NULL
 411 #undef  TARGET_ASM_INTEGER
 412 #define TARGET_ASM_INTEGER arm_assemble_integer
 413
 414 #undef TARGET_PRINT_OPERAND
 415 #define TARGET_PRINT_OPERAND arm_print_operand
 416 #undef TARGET_PRINT_OPERAND_ADDRESS
 417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 420
 421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 423
 424 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 426
 427 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 429
 430 #undef TARGET_CAN_INLINE_P
 431 #define TARGET_CAN_INLINE_P arm_can_inline_p
 432
 433 #undef TARGET_RELAYOUT_FUNCTION
 434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 435
 436 #undef  TARGET_OPTION_OVERRIDE
 437 #define TARGET_OPTION_OVERRIDE arm_option_override
 438
 439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 441
 442 #undef TARGET_OPTION_RESTORE
 443 #define TARGET_OPTION_RESTORE arm_option_restore
 444
 445 #undef TARGET_OPTION_PRINT
 446 #define TARGET_OPTION_PRINT arm_option_print
 447
 448 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 449 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 450
 451 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 452 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 453
 454 #undef TARGET_SCHED_MACRO_FUSION_P
 455 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 456
 457 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 458 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 459
 460 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 461 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 462
 463 #undef  TARGET_SCHED_ADJUST_COST
 464 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 465
 466 #undef TARGET_SET_CURRENT_FUNCTION
 467 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 468
 469 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 470 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 471
 472 #undef TARGET_SCHED_REORDER
 473 #define TARGET_SCHED_REORDER arm_sched_reorder
 474
 475 #undef TARGET_REGISTER_MOVE_COST
 476 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 477
 478 #undef TARGET_MEMORY_MOVE_COST
 479 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 480
 481 #undef TARGET_ENCODE_SECTION_INFO
 482 #ifdef ARM_PE
 483 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 484 #else
 485 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 486 #endif
 487
 488 #undef  TARGET_STRIP_NAME_ENCODING
 489 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 490
 491 #undef  TARGET_ASM_INTERNAL_LABEL
 492 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 493
 494 #undef TARGET_FLOATN_MODE
 495 #define TARGET_FLOATN_MODE arm_floatn_mode
 496
 497 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 498 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 499
 500 #undef  TARGET_FUNCTION_VALUE
 501 #define TARGET_FUNCTION_VALUE arm_function_value
 502
 503 #undef  TARGET_LIBCALL_VALUE
 504 #define TARGET_LIBCALL_VALUE arm_libcall_value
 505
 506 #undef TARGET_FUNCTION_VALUE_REGNO_P
 507 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 508
 509 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 510 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 511 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 512 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 513
 514 #undef  TARGET_RTX_COSTS
 515 #define TARGET_RTX_COSTS arm_rtx_costs
 516 #undef  TARGET_ADDRESS_COST
 517 #define TARGET_ADDRESS_COST arm_address_cost
 518 #undef TARGET_INSN_COST
 519 #define TARGET_INSN_COST arm_insn_cost
 520
 521 #undef TARGET_SHIFT_TRUNCATION_MASK
 522 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 524 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 525 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 526 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 527 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 528 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 529 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 530 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 531   arm_autovectorize_vector_modes
 532
 533 #undef  TARGET_MACHINE_DEPENDENT_REORG
 534 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 535
 536 #undef  TARGET_INIT_BUILTINS
 537 #define TARGET_INIT_BUILTINS  arm_init_builtins
 538 #undef  TARGET_EXPAND_BUILTIN
 539 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 540 #undef  TARGET_BUILTIN_DECL
 541 #define TARGET_BUILTIN_DECL arm_builtin_decl
 542
 543 #undef TARGET_INIT_LIBFUNCS
 544 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 545
 546 #undef TARGET_PROMOTE_FUNCTION_MODE
 547 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 548 #undef TARGET_PROMOTE_PROTOTYPES
 549 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 550 #undef TARGET_PASS_BY_REFERENCE
 551 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 552 #undef TARGET_ARG_PARTIAL_BYTES
 553 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 554 #undef TARGET_FUNCTION_ARG
 555 #define TARGET_FUNCTION_ARG arm_function_arg
 556 #undef TARGET_FUNCTION_ARG_ADVANCE
 557 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 558 #undef TARGET_FUNCTION_ARG_PADDING
 559 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 560 #undef TARGET_FUNCTION_ARG_BOUNDARY
 561 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 562
 563 #undef  TARGET_SETUP_INCOMING_VARARGS
 564 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 565
 566 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 567 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 568
 569 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 570 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 571 #undef TARGET_TRAMPOLINE_INIT
 572 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 573 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 574 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 575
 576 #undef TARGET_WARN_FUNC_RETURN
 577 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 578
 579 #undef TARGET_DEFAULT_SHORT_ENUMS
 580 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 581
 582 #undef TARGET_ALIGN_ANON_BITFIELD
 583 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 584
 585 #undef TARGET_NARROW_VOLATILE_BITFIELD
 586 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 587
 588 #undef TARGET_CXX_GUARD_TYPE
 589 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 590
 591 #undef TARGET_CXX_GUARD_MASK_BIT
 592 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 593
 594 #undef TARGET_CXX_GET_COOKIE_SIZE
 595 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 596
 597 #undef TARGET_CXX_COOKIE_HAS_SIZE
 598 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 599
 600 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 601 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 602
 603 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 604 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 605
 606 #undef TARGET_CXX_USE_AEABI_ATEXIT
 607 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 608
 609 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 610 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 611   arm_cxx_determine_class_data_visibility
 612
 613 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 614 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 615
 616 #undef TARGET_RETURN_IN_MSB
 617 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 618
 619 #undef TARGET_RETURN_IN_MEMORY
 620 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 621
 622 #undef TARGET_MUST_PASS_IN_STACK
 623 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 624
 625 #if ARM_UNWIND_INFO
 626 #undef TARGET_ASM_UNWIND_EMIT
 627 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 628
 629 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 630 #undef TARGET_ASM_TTYPE
 631 #define TARGET_ASM_TTYPE arm_output_ttype
 632
 633 #undef TARGET_ARM_EABI_UNWINDER
 634 #define TARGET_ARM_EABI_UNWINDER true
 635
 636 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 637 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 638
 639 #endif /* ARM_UNWIND_INFO */
 640
 641 #undef TARGET_ASM_INIT_SECTIONS
 642 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 643
 644 #undef TARGET_DWARF_REGISTER_SPAN
 645 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 646
 647 #undef  TARGET_CANNOT_COPY_INSN_P
 648 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 649
 650 #ifdef HAVE_AS_TLS
 651 #undef TARGET_HAVE_TLS
 652 #define TARGET_HAVE_TLS true
 653 #endif
 654
 655 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 656 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 657
 658 #undef TARGET_LEGITIMATE_CONSTANT_P
 659 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 660
 661 #undef TARGET_CANNOT_FORCE_CONST_MEM
 662 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 663
 664 #undef TARGET_MAX_ANCHOR_OFFSET
 665 #define TARGET_MAX_ANCHOR_OFFSET 4095
 666
 667 /* The minimum is set such that the total size of the block
 668    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 669    divisible by eight, ensuring natural spacing of anchors.  */
 670 #undef TARGET_MIN_ANCHOR_OFFSET
 671 #define TARGET_MIN_ANCHOR_OFFSET -4088
 672
 673 #undef TARGET_SCHED_ISSUE_RATE
 674 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 675
 676 #undef TARGET_SCHED_VARIABLE_ISSUE
 677 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 678
 679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 681   arm_first_cycle_multipass_dfa_lookahead
 682
 683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 685   arm_first_cycle_multipass_dfa_lookahead_guard
 686
 687 #undef TARGET_MANGLE_TYPE
 688 #define TARGET_MANGLE_TYPE arm_mangle_type
 689
 690 #undef TARGET_INVALID_CONVERSION
 691 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 692
 693 #undef TARGET_INVALID_UNARY_OP
 694 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 695
 696 #undef TARGET_INVALID_BINARY_OP
 697 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 698
 699 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 700 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 701
 702 #undef TARGET_BUILD_BUILTIN_VA_LIST
 703 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 704 #undef TARGET_EXPAND_BUILTIN_VA_START
 705 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 707 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 708
 709 #ifdef HAVE_AS_TLS
 710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 712 #endif
 713
 714 #undef TARGET_LEGITIMATE_ADDRESS_P
 715 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 716
 717 #undef TARGET_PREFERRED_RELOAD_CLASS
 718 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 719
 720 #undef TARGET_PROMOTED_TYPE
 721 #define TARGET_PROMOTED_TYPE arm_promoted_type
 722
 723 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 724 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 725
 726 #undef TARGET_COMPUTE_FRAME_LAYOUT
 727 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 728
 729 #undef TARGET_FRAME_POINTER_REQUIRED
 730 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 731
 732 #undef TARGET_CAN_ELIMINATE
 733 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 734
 735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 736 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 737
 738 #undef TARGET_CLASS_LIKELY_SPILLED_P
 739 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 740
 741 #undef TARGET_VECTORIZE_BUILTINS
 742 #define TARGET_VECTORIZE_BUILTINS
 743
 744 #undef TARGET_VECTOR_ALIGNMENT
 745 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 746
 747 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 748 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 749   arm_vector_alignment_reachable
 750
 751 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 752 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 753   arm_builtin_support_vector_misalignment
 754
 755 #undef TARGET_PREFERRED_RENAME_CLASS
 756 #define TARGET_PREFERRED_RENAME_CLASS \
 757   arm_preferred_rename_class
 758
 759 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 760 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 761
 762 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 763 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 764   arm_builtin_vectorization_cost
 765
 766 #undef TARGET_CANONICALIZE_COMPARISON
 767 #define TARGET_CANONICALIZE_COMPARISON \
 768   arm_canonicalize_comparison
 769
 770 #undef TARGET_ASAN_SHADOW_OFFSET
 771 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 772
 773 #undef MAX_INSN_PER_IT_BLOCK
 774 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 775
 776 #undef TARGET_CAN_USE_DOLOOP_P
 777 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 778
 779 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 780 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 781
 782 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 783 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 784
 785 #undef TARGET_SCHED_FUSION_PRIORITY
 786 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 787
 788 #undef  TARGET_ASM_FUNCTION_SECTION
 789 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 790
 791 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 792 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 793
 794 #undef TARGET_SECTION_TYPE_FLAGS
 795 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 796
 797 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 798 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 799
 800 #undef TARGET_C_EXCESS_PRECISION
 801 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 802
 803 /* Although the architecture reserves bits 0 and 1, only the former is
 804    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 805 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 806 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 807
 808 #undef TARGET_FIXED_CONDITION_CODE_REGS
 809 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 810
 811 #undef TARGET_HARD_REGNO_NREGS
 812 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 813 #undef TARGET_HARD_REGNO_MODE_OK
 814 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 815
 816 #undef TARGET_MODES_TIEABLE_P
 817 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 818
 819 #undef TARGET_CAN_CHANGE_MODE_CLASS
 820 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 821
 822 #undef TARGET_CONSTANT_ALIGNMENT
 823 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 824
 825 #undef TARGET_INVALID_WITHIN_DOLOOP
 826 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 827
 828 #undef TARGET_MD_ASM_ADJUST
 829 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 830
 831 #undef TARGET_STACK_PROTECT_GUARD
 832 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 833
 834 #undef TARGET_VECTORIZE_GET_MASK_MODE
 835 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 836 \f
 837 /* Obstack for minipool constant handling.  */
 838 static struct obstack minipool_obstack;
 839 static char *         minipool_startobj;
 840
 841 /* The maximum number of insns skipped which
 842    will be conditionalised if possible.  */
 843 static int max_insns_skipped = 5;
 844
 845 /* True if we are currently building a constant table.  */
 846 int making_const_table;
 847
 848 /* The processor for which instructions should be scheduled.  */
 849 enum processor_type arm_tune = TARGET_CPU_arm_none;
 850
 851 /* The current tuning set.  */
 852 const struct tune_params *current_tune;
 853
 854 /* Which floating point hardware to schedule for.  */
 855 int arm_fpu_attr;
 856
 857 /* Used for Thumb call_via trampolines.  */
 858 rtx thumb_call_via_label[14];
 859 static int thumb_call_reg_needed;
 860
 861 /* The bits in this mask specify which instruction scheduling options should
 862    be used.  */
 863 unsigned int tune_flags = 0;
 864
 865 /* The highest ARM architecture version supported by the
 866    target.  */
 867 enum base_architecture arm_base_arch = BASE_ARCH_0;
 868
 869 /* Active target architecture and tuning.  */
 870
 871 struct arm_build_target arm_active_target;
 872
 873 /* The following are used in the arm.md file as equivalents to bits
 874    in the above two flag variables.  */
 875
 876 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 877 int arm_arch4 = 0;
 878
 879 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 880 int arm_arch4t = 0;
 881
 882 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 883 int arm_arch5t = 0;
 884
 885 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 886 int arm_arch5te = 0;
 887
 888 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 889 int arm_arch6 = 0;
 890
 891 /* Nonzero if this chip supports the ARM 6K extensions.  */
 892 int arm_arch6k = 0;
 893
 894 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 895 int arm_arch6kz = 0;
 896
 897 /* Nonzero if instructions present in ARMv6-M can be used.  */
 898 int arm_arch6m = 0;
 899
 900 /* Nonzero if this chip supports the ARM 7 extensions.  */
 901 int arm_arch7 = 0;
 902
 903 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 904 int arm_arch_lpae = 0;
 905
 906 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 907 int arm_arch_notm = 0;
 908
 909 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 910 int arm_arch7em = 0;
 911
 912 /* Nonzero if instructions present in ARMv8 can be used.  */
 913 int arm_arch8 = 0;
 914
 915 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 916 int arm_arch8_1 = 0;
 917
 918 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 919 int arm_arch8_2 = 0;
 920
 921 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 922 int arm_arch8_3 = 0;
 923
 924 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 925 int arm_arch8_4 = 0;
 926
 927 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 928    extensions.  */
 929 int arm_arch8m_main = 0;
 930
 931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 932    extensions.  */
 933 int arm_arch8_1m_main = 0;
 934
 935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 936    Architecture 8.2.  */
 937 int arm_fp16_inst = 0;
 938
 939 /* Nonzero if this chip can benefit from load scheduling.  */
 940 int arm_ld_sched = 0;
 941
 942 /* Nonzero if this chip is a StrongARM.  */
 943 int arm_tune_strongarm = 0;
 944
 945 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 946 int arm_arch_iwmmxt = 0;
 947
 948 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 949 int arm_arch_iwmmxt2 = 0;
 950
 951 /* Nonzero if this chip is an XScale.  */
 952 int arm_arch_xscale = 0;
 953
 954 /* Nonzero if tuning for XScale  */
 955 int arm_tune_xscale = 0;
 956
 957 /* Nonzero if we want to tune for stores that access the write-buffer.
 958    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 959 int arm_tune_wbuf = 0;
 960
 961 /* Nonzero if tuning for Cortex-A9.  */
 962 int arm_tune_cortex_a9 = 0;
 963
 964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 965    preprocessor.
 966    XXX This is a bit of a hack, it's intended to help work around
 967    problems in GLD which doesn't understand that armv5t code is
 968    interworking clean.  */
 969 int arm_cpp_interwork = 0;
 970
 971 /* Nonzero if chip supports Thumb 1.  */
 972 int arm_arch_thumb1;
 973
 974 /* Nonzero if chip supports Thumb 2.  */
 975 int arm_arch_thumb2;
 976
 977 /* Nonzero if chip supports integer division instruction.  */
 978 int arm_arch_arm_hwdiv;
 979 int arm_arch_thumb_hwdiv;
 980
 981 /* Nonzero if chip disallows volatile memory access in IT block.  */
 982 int arm_arch_no_volatile_ce;
 983
 984 /* Nonzero if we shouldn't use literal pools.  */
 985 bool arm_disable_literal_pool = false;
 986
 987 /* The register number to be used for the PIC offset register.  */
 988 unsigned arm_pic_register = INVALID_REGNUM;
 989
 990 enum arm_pcs arm_pcs_default;
 991
 992 /* For an explanation of these variables, see final_prescan_insn below.  */
 993 int arm_ccfsm_state;
 994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 995 enum arm_cond_code arm_current_cc;
 996
 997 rtx arm_target_insn;
 998 int arm_target_label;
 999 /* The number of conditionally executed insns, including the current insn.  */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002    Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask.  */
1005 int arm_condexec_masklen = 0;
1006
1007 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1008 int arm_arch_crc = 0;
1009
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1011 int arm_arch_dotprod = 0;
1012
1013 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1014 int arm_arch_cmse = 0;
1015
1016 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1017 int arm_m_profile_small_mul = 0;
1018
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1020 int arm_arch_i8mm = 0;
1021
1022 /* Nonzero if chip supports the BFloat16 instructions.  */
1023 int arm_arch_bf16 = 0;
1024
1025 /* Nonzero if chip supports the Custom Datapath Extension.  */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031
1032 /* The condition codes of the ARM, and the inverse function.  */
1033 static const char * const arm_condition_codes[] =
1034 {
1035   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1040 int arm_regs_in_sequence[] =
1041 {
1042   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047   FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053
1054 #define THUMB2_WORK_REGS                                        \
1055   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1056             | (1 << SP_REGNUM)                                  \
1057             | (1 << PC_REGNUM)                                  \
1058             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1059                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1060                : 0)))
1061 \f
1062 /* Initialization code.  */
1063
1064 struct cpu_tune
1065 {
1066   enum processor_type scheduler;
1067   unsigned int tune_flags;
1068   const struct tune_params *tune;
1069 };
1070
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073   {                                                             \
1074     num_slots,                                                  \
1075     l1_size,                                                    \
1076     l1_line_size                                                \
1077   }
1078
1079 /* arm generic vectorizer costs.  */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082   1,                                    /* scalar_stmt_cost.  */
1083   1,                                    /* scalar load_cost.  */
1084   1,                                    /* scalar_store_cost.  */
1085   1,                                    /* vec_stmt_cost.  */
1086   1,                                    /* vec_to_scalar_cost.  */
1087   1,                                    /* scalar_to_vec_cost.  */
1088   1,                                    /* vec_align_load_cost.  */
1089   1,                                    /* vec_unalign_load_cost.  */
1090   1,                                    /* vec_unalign_store_cost.  */
1091   1,                                    /* vec_store_cost.  */
1092   3,                                    /* cond_taken_branch_cost.  */
1093   1,                                    /* cond_not_taken_branch_cost.  */
1094 };
1095
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1097 #include "aarch-cost-tables.h"
1098
1099
1100
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103   /* ALU */
1104   {
1105     0,                  /* arith.  */
1106     0,                  /* logical.  */
1107     0,                  /* shift.  */
1108     COSTS_N_INSNS (1),  /* shift_reg.  */
1109     COSTS_N_INSNS (1),  /* arith_shift.  */
1110     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1111     0,                  /* log_shift.  */
1112     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1113     COSTS_N_INSNS (1),  /* extend.  */
1114     COSTS_N_INSNS (2),  /* extend_arith.  */
1115     COSTS_N_INSNS (1),  /* bfi.  */
1116     COSTS_N_INSNS (1),  /* bfx.  */
1117     0,                  /* clz.  */
1118     0,                  /* rev.  */
1119     0,                  /* non_exec.  */
1120     true                /* non_exec_costs_exec.  */
1121   },
1122   {
1123     /* MULT SImode */
1124     {
1125       COSTS_N_INSNS (3),        /* simple.  */
1126       COSTS_N_INSNS (3),        /* flag_setting.  */
1127       COSTS_N_INSNS (2),        /* extend.  */
1128       COSTS_N_INSNS (3),        /* add.  */
1129       COSTS_N_INSNS (2),        /* extend_add.  */
1130       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1131     },
1132     /* MULT DImode */
1133     {
1134       0,                        /* simple (N/A).  */
1135       0,                        /* flag_setting (N/A).  */
1136       COSTS_N_INSNS (4),        /* extend.  */
1137       0,                        /* add (N/A).  */
1138       COSTS_N_INSNS (4),        /* extend_add.  */
1139       0                         /* idiv (N/A).  */
1140     }
1141   },
1142   /* LD/ST */
1143   {
1144     COSTS_N_INSNS (2),  /* load.  */
1145     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1146     COSTS_N_INSNS (2),  /* ldrd.  */
1147     COSTS_N_INSNS (2),  /* ldm_1st.  */
1148     1,                  /* ldm_regs_per_insn_1st.  */
1149     2,                  /* ldm_regs_per_insn_subsequent.  */
1150     COSTS_N_INSNS (5),  /* loadf.  */
1151     COSTS_N_INSNS (5),  /* loadd.  */
1152     COSTS_N_INSNS (1),  /* load_unaligned.  */
1153     COSTS_N_INSNS (2),  /* store.  */
1154     COSTS_N_INSNS (2),  /* strd.  */
1155     COSTS_N_INSNS (2),  /* stm_1st.  */
1156     1,                  /* stm_regs_per_insn_1st.  */
1157     2,                  /* stm_regs_per_insn_subsequent.  */
1158     COSTS_N_INSNS (1),  /* storef.  */
1159     COSTS_N_INSNS (1),  /* stored.  */
1160     COSTS_N_INSNS (1),  /* store_unaligned.  */
1161     COSTS_N_INSNS (1),  /* loadv.  */
1162     COSTS_N_INSNS (1)   /* storev.  */
1163   },
1164   {
1165     /* FP SFmode */
1166     {
1167       COSTS_N_INSNS (14),       /* div.  */
1168       COSTS_N_INSNS (4),        /* mult.  */
1169       COSTS_N_INSNS (7),        /* mult_addsub. */
1170       COSTS_N_INSNS (30),       /* fma.  */
1171       COSTS_N_INSNS (3),        /* addsub.  */
1172       COSTS_N_INSNS (1),        /* fpconst.  */
1173       COSTS_N_INSNS (1),        /* neg.  */
1174       COSTS_N_INSNS (3),        /* compare.  */
1175       COSTS_N_INSNS (3),        /* widen.  */
1176       COSTS_N_INSNS (3),        /* narrow.  */
1177       COSTS_N_INSNS (3),        /* toint.  */
1178       COSTS_N_INSNS (3),        /* fromint.  */
1179       COSTS_N_INSNS (3)         /* roundint.  */
1180     },
1181     /* FP DFmode */
1182     {
1183       COSTS_N_INSNS (24),       /* div.  */
1184       COSTS_N_INSNS (5),        /* mult.  */
1185       COSTS_N_INSNS (8),        /* mult_addsub.  */
1186       COSTS_N_INSNS (30),       /* fma.  */
1187       COSTS_N_INSNS (3),        /* addsub.  */
1188       COSTS_N_INSNS (1),        /* fpconst.  */
1189       COSTS_N_INSNS (1),        /* neg.  */
1190       COSTS_N_INSNS (3),        /* compare.  */
1191       COSTS_N_INSNS (3),        /* widen.  */
1192       COSTS_N_INSNS (3),        /* narrow.  */
1193       COSTS_N_INSNS (3),        /* toint.  */
1194       COSTS_N_INSNS (3),        /* fromint.  */
1195       COSTS_N_INSNS (3)         /* roundint.  */
1196     }
1197   },
1198   /* Vector */
1199   {
1200     COSTS_N_INSNS (1),  /* alu.  */
1201     COSTS_N_INSNS (4),  /* mult.  */
1202     COSTS_N_INSNS (1),  /* movi.  */
1203     COSTS_N_INSNS (2),  /* dup.  */
1204     COSTS_N_INSNS (2)   /* extract.  */
1205   }
1206 };
1207
1208 const struct cpu_cost_table cortexa8_extra_costs =
1209 {
1210   /* ALU */
1211   {
1212     0,                  /* arith.  */
1213     0,                  /* logical.  */
1214     COSTS_N_INSNS (1),  /* shift.  */
1215     0,                  /* shift_reg.  */
1216     COSTS_N_INSNS (1),  /* arith_shift.  */
1217     0,                  /* arith_shift_reg.  */
1218     COSTS_N_INSNS (1),  /* log_shift.  */
1219     0,                  /* log_shift_reg.  */
1220     0,                  /* extend.  */
1221     0,                  /* extend_arith.  */
1222     0,                  /* bfi.  */
1223     0,                  /* bfx.  */
1224     0,                  /* clz.  */
1225     0,                  /* rev.  */
1226     0,                  /* non_exec.  */
1227     true                /* non_exec_costs_exec.  */
1228   },
1229   {
1230     /* MULT SImode */
1231     {
1232       COSTS_N_INSNS (1),        /* simple.  */
1233       COSTS_N_INSNS (1),        /* flag_setting.  */
1234       COSTS_N_INSNS (1),        /* extend.  */
1235       COSTS_N_INSNS (1),        /* add.  */
1236       COSTS_N_INSNS (1),        /* extend_add.  */
1237       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1238     },
1239     /* MULT DImode */
1240     {
1241       0,                        /* simple (N/A).  */
1242       0,                        /* flag_setting (N/A).  */
1243       COSTS_N_INSNS (2),        /* extend.  */
1244       0,                        /* add (N/A).  */
1245       COSTS_N_INSNS (2),        /* extend_add.  */
1246       0                         /* idiv (N/A).  */
1247     }
1248   },
1249   /* LD/ST */
1250   {
1251     COSTS_N_INSNS (1),  /* load.  */
1252     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1253     COSTS_N_INSNS (1),  /* ldrd.  */
1254     COSTS_N_INSNS (1),  /* ldm_1st.  */
1255     1,                  /* ldm_regs_per_insn_1st.  */
1256     2,                  /* ldm_regs_per_insn_subsequent.  */
1257     COSTS_N_INSNS (1),  /* loadf.  */
1258     COSTS_N_INSNS (1),  /* loadd.  */
1259     COSTS_N_INSNS (1),  /* load_unaligned.  */
1260     COSTS_N_INSNS (1),  /* store.  */
1261     COSTS_N_INSNS (1),  /* strd.  */
1262     COSTS_N_INSNS (1),  /* stm_1st.  */
1263     1,                  /* stm_regs_per_insn_1st.  */
1264     2,                  /* stm_regs_per_insn_subsequent.  */
1265     COSTS_N_INSNS (1),  /* storef.  */
1266     COSTS_N_INSNS (1),  /* stored.  */
1267     COSTS_N_INSNS (1),  /* store_unaligned.  */
1268     COSTS_N_INSNS (1),  /* loadv.  */
1269     COSTS_N_INSNS (1)   /* storev.  */
1270   },
1271   {
1272     /* FP SFmode */
1273     {
1274       COSTS_N_INSNS (36),       /* div.  */
1275       COSTS_N_INSNS (11),       /* mult.  */
1276       COSTS_N_INSNS (20),       /* mult_addsub. */
1277       COSTS_N_INSNS (30),       /* fma.  */
1278       COSTS_N_INSNS (9),        /* addsub.  */
1279       COSTS_N_INSNS (3),        /* fpconst.  */
1280       COSTS_N_INSNS (3),        /* neg.  */
1281       COSTS_N_INSNS (6),        /* compare.  */
1282       COSTS_N_INSNS (4),        /* widen.  */
1283       COSTS_N_INSNS (4),        /* narrow.  */
1284       COSTS_N_INSNS (8),        /* toint.  */
1285       COSTS_N_INSNS (8),        /* fromint.  */
1286       COSTS_N_INSNS (8)         /* roundint.  */
1287     },
1288     /* FP DFmode */
1289     {
1290       COSTS_N_INSNS (64),       /* div.  */
1291       COSTS_N_INSNS (16),       /* mult.  */
1292       COSTS_N_INSNS (25),       /* mult_addsub.  */
1293       COSTS_N_INSNS (30),       /* fma.  */
1294       COSTS_N_INSNS (9),        /* addsub.  */
1295       COSTS_N_INSNS (3),        /* fpconst.  */
1296       COSTS_N_INSNS (3),        /* neg.  */
1297       COSTS_N_INSNS (6),        /* compare.  */
1298       COSTS_N_INSNS (6),        /* widen.  */
1299       COSTS_N_INSNS (6),        /* narrow.  */
1300       COSTS_N_INSNS (8),        /* toint.  */
1301       COSTS_N_INSNS (8),        /* fromint.  */
1302       COSTS_N_INSNS (8)         /* roundint.  */
1303     }
1304   },
1305   /* Vector */
1306   {
1307     COSTS_N_INSNS (1),  /* alu.  */
1308     COSTS_N_INSNS (4),  /* mult.  */
1309     COSTS_N_INSNS (1),  /* movi.  */
1310     COSTS_N_INSNS (2),  /* dup.  */
1311     COSTS_N_INSNS (2)   /* extract.  */
1312   }
1313 };
1314
1315 const struct cpu_cost_table cortexa5_extra_costs =
1316 {
1317   /* ALU */
1318   {
1319     0,                  /* arith.  */
1320     0,                  /* logical.  */
1321     COSTS_N_INSNS (1),  /* shift.  */
1322     COSTS_N_INSNS (1),  /* shift_reg.  */
1323     COSTS_N_INSNS (1),  /* arith_shift.  */
1324     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1325     COSTS_N_INSNS (1),  /* log_shift.  */
1326     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1327     COSTS_N_INSNS (1),  /* extend.  */
1328     COSTS_N_INSNS (1),  /* extend_arith.  */
1329     COSTS_N_INSNS (1),  /* bfi.  */
1330     COSTS_N_INSNS (1),  /* bfx.  */
1331     COSTS_N_INSNS (1),  /* clz.  */
1332     COSTS_N_INSNS (1),  /* rev.  */
1333     0,                  /* non_exec.  */
1334     true                /* non_exec_costs_exec.  */
1335   },
1336
1337   {
1338     /* MULT SImode */
1339     {
1340       0,                        /* simple.  */
1341       COSTS_N_INSNS (1),        /* flag_setting.  */
1342       COSTS_N_INSNS (1),        /* extend.  */
1343       COSTS_N_INSNS (1),        /* add.  */
1344       COSTS_N_INSNS (1),        /* extend_add.  */
1345       COSTS_N_INSNS (7)         /* idiv.  */
1346     },
1347     /* MULT DImode */
1348     {
1349       0,                        /* simple (N/A).  */
1350       0,                        /* flag_setting (N/A).  */
1351       COSTS_N_INSNS (1),        /* extend.  */
1352       0,                        /* add.  */
1353       COSTS_N_INSNS (2),        /* extend_add.  */
1354       0                         /* idiv (N/A).  */
1355     }
1356   },
1357   /* LD/ST */
1358   {
1359     COSTS_N_INSNS (1),  /* load.  */
1360     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1361     COSTS_N_INSNS (6),  /* ldrd.  */
1362     COSTS_N_INSNS (1),  /* ldm_1st.  */
1363     1,                  /* ldm_regs_per_insn_1st.  */
1364     2,                  /* ldm_regs_per_insn_subsequent.  */
1365     COSTS_N_INSNS (2),  /* loadf.  */
1366     COSTS_N_INSNS (4),  /* loadd.  */
1367     COSTS_N_INSNS (1),  /* load_unaligned.  */
1368     COSTS_N_INSNS (1),  /* store.  */
1369     COSTS_N_INSNS (3),  /* strd.  */
1370     COSTS_N_INSNS (1),  /* stm_1st.  */
1371     1,                  /* stm_regs_per_insn_1st.  */
1372     2,                  /* stm_regs_per_insn_subsequent.  */
1373     COSTS_N_INSNS (2),  /* storef.  */
1374     COSTS_N_INSNS (2),  /* stored.  */
1375     COSTS_N_INSNS (1),  /* store_unaligned.  */
1376     COSTS_N_INSNS (1),  /* loadv.  */
1377     COSTS_N_INSNS (1)   /* storev.  */
1378   },
1379   {
1380     /* FP SFmode */
1381     {
1382       COSTS_N_INSNS (15),       /* div.  */
1383       COSTS_N_INSNS (3),        /* mult.  */
1384       COSTS_N_INSNS (7),        /* mult_addsub. */
1385       COSTS_N_INSNS (7),        /* fma.  */
1386       COSTS_N_INSNS (3),        /* addsub.  */
1387       COSTS_N_INSNS (3),        /* fpconst.  */
1388       COSTS_N_INSNS (3),        /* neg.  */
1389       COSTS_N_INSNS (3),        /* compare.  */
1390       COSTS_N_INSNS (3),        /* widen.  */
1391       COSTS_N_INSNS (3),        /* narrow.  */
1392       COSTS_N_INSNS (3),        /* toint.  */
1393       COSTS_N_INSNS (3),        /* fromint.  */
1394       COSTS_N_INSNS (3)         /* roundint.  */
1395     },
1396     /* FP DFmode */
1397     {
1398       COSTS_N_INSNS (30),       /* div.  */
1399       COSTS_N_INSNS (6),        /* mult.  */
1400       COSTS_N_INSNS (10),       /* mult_addsub.  */
1401       COSTS_N_INSNS (7),        /* fma.  */
1402       COSTS_N_INSNS (3),        /* addsub.  */
1403       COSTS_N_INSNS (3),        /* fpconst.  */
1404       COSTS_N_INSNS (3),        /* neg.  */
1405       COSTS_N_INSNS (3),        /* compare.  */
1406       COSTS_N_INSNS (3),        /* widen.  */
1407       COSTS_N_INSNS (3),        /* narrow.  */
1408       COSTS_N_INSNS (3),        /* toint.  */
1409       COSTS_N_INSNS (3),        /* fromint.  */
1410       COSTS_N_INSNS (3)         /* roundint.  */
1411     }
1412   },
1413   /* Vector */
1414   {
1415     COSTS_N_INSNS (1),  /* alu.  */
1416     COSTS_N_INSNS (4),  /* mult.  */
1417     COSTS_N_INSNS (1),  /* movi.  */
1418     COSTS_N_INSNS (2),  /* dup.  */
1419     COSTS_N_INSNS (2)   /* extract.  */
1420   }
1421 };
1422
1423
1424 const struct cpu_cost_table cortexa7_extra_costs =
1425 {
1426   /* ALU */
1427   {
1428     0,                  /* arith.  */
1429     0,                  /* logical.  */
1430     COSTS_N_INSNS (1),  /* shift.  */
1431     COSTS_N_INSNS (1),  /* shift_reg.  */
1432     COSTS_N_INSNS (1),  /* arith_shift.  */
1433     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1434     COSTS_N_INSNS (1),  /* log_shift.  */
1435     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1436     COSTS_N_INSNS (1),  /* extend.  */
1437     COSTS_N_INSNS (1),  /* extend_arith.  */
1438     COSTS_N_INSNS (1),  /* bfi.  */
1439     COSTS_N_INSNS (1),  /* bfx.  */
1440     COSTS_N_INSNS (1),  /* clz.  */
1441     COSTS_N_INSNS (1),  /* rev.  */
1442     0,                  /* non_exec.  */
1443     true                /* non_exec_costs_exec.  */
1444   },
1445
1446   {
1447     /* MULT SImode */
1448     {
1449       0,                        /* simple.  */
1450       COSTS_N_INSNS (1),        /* flag_setting.  */
1451       COSTS_N_INSNS (1),        /* extend.  */
1452       COSTS_N_INSNS (1),        /* add.  */
1453       COSTS_N_INSNS (1),        /* extend_add.  */
1454       COSTS_N_INSNS (7)         /* idiv.  */
1455     },
1456     /* MULT DImode */
1457     {
1458       0,                        /* simple (N/A).  */
1459       0,                        /* flag_setting (N/A).  */
1460       COSTS_N_INSNS (1),        /* extend.  */
1461       0,                        /* add.  */
1462       COSTS_N_INSNS (2),        /* extend_add.  */
1463       0                         /* idiv (N/A).  */
1464     }
1465   },
1466   /* LD/ST */
1467   {
1468     COSTS_N_INSNS (1),  /* load.  */
1469     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1470     COSTS_N_INSNS (3),  /* ldrd.  */
1471     COSTS_N_INSNS (1),  /* ldm_1st.  */
1472     1,                  /* ldm_regs_per_insn_1st.  */
1473     2,                  /* ldm_regs_per_insn_subsequent.  */
1474     COSTS_N_INSNS (2),  /* loadf.  */
1475     COSTS_N_INSNS (2),  /* loadd.  */
1476     COSTS_N_INSNS (1),  /* load_unaligned.  */
1477     COSTS_N_INSNS (1),  /* store.  */
1478     COSTS_N_INSNS (3),  /* strd.  */
1479     COSTS_N_INSNS (1),  /* stm_1st.  */
1480     1,                  /* stm_regs_per_insn_1st.  */
1481     2,                  /* stm_regs_per_insn_subsequent.  */
1482     COSTS_N_INSNS (2),  /* storef.  */
1483     COSTS_N_INSNS (2),  /* stored.  */
1484     COSTS_N_INSNS (1),  /* store_unaligned.  */
1485     COSTS_N_INSNS (1),  /* loadv.  */
1486     COSTS_N_INSNS (1)   /* storev.  */
1487   },
1488   {
1489     /* FP SFmode */
1490     {
1491       COSTS_N_INSNS (15),       /* div.  */
1492       COSTS_N_INSNS (3),        /* mult.  */
1493       COSTS_N_INSNS (7),        /* mult_addsub. */
1494       COSTS_N_INSNS (7),        /* fma.  */
1495       COSTS_N_INSNS (3),        /* addsub.  */
1496       COSTS_N_INSNS (3),        /* fpconst.  */
1497       COSTS_N_INSNS (3),        /* neg.  */
1498       COSTS_N_INSNS (3),        /* compare.  */
1499       COSTS_N_INSNS (3),        /* widen.  */
1500       COSTS_N_INSNS (3),        /* narrow.  */
1501       COSTS_N_INSNS (3),        /* toint.  */
1502       COSTS_N_INSNS (3),        /* fromint.  */
1503       COSTS_N_INSNS (3)         /* roundint.  */
1504     },
1505     /* FP DFmode */
1506     {
1507       COSTS_N_INSNS (30),       /* div.  */
1508       COSTS_N_INSNS (6),        /* mult.  */
1509       COSTS_N_INSNS (10),       /* mult_addsub.  */
1510       COSTS_N_INSNS (7),        /* fma.  */
1511       COSTS_N_INSNS (3),        /* addsub.  */
1512       COSTS_N_INSNS (3),        /* fpconst.  */
1513       COSTS_N_INSNS (3),        /* neg.  */
1514       COSTS_N_INSNS (3),        /* compare.  */
1515       COSTS_N_INSNS (3),        /* widen.  */
1516       COSTS_N_INSNS (3),        /* narrow.  */
1517       COSTS_N_INSNS (3),        /* toint.  */
1518       COSTS_N_INSNS (3),        /* fromint.  */
1519       COSTS_N_INSNS (3)         /* roundint.  */
1520     }
1521   },
1522   /* Vector */
1523   {
1524     COSTS_N_INSNS (1),  /* alu.  */
1525     COSTS_N_INSNS (4),  /* mult.  */
1526     COSTS_N_INSNS (1),  /* movi.  */
1527     COSTS_N_INSNS (2),  /* dup.  */
1528     COSTS_N_INSNS (2)   /* extract.  */
1529   }
1530 };
1531
1532 const struct cpu_cost_table cortexa12_extra_costs =
1533 {
1534   /* ALU */
1535   {
1536     0,                  /* arith.  */
1537     0,                  /* logical.  */
1538     0,                  /* shift.  */
1539     COSTS_N_INSNS (1),  /* shift_reg.  */
1540     COSTS_N_INSNS (1),  /* arith_shift.  */
1541     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1542     COSTS_N_INSNS (1),  /* log_shift.  */
1543     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1544     0,                  /* extend.  */
1545     COSTS_N_INSNS (1),  /* extend_arith.  */
1546     0,                  /* bfi.  */
1547     COSTS_N_INSNS (1),  /* bfx.  */
1548     COSTS_N_INSNS (1),  /* clz.  */
1549     COSTS_N_INSNS (1),  /* rev.  */
1550     0,                  /* non_exec.  */
1551     true                /* non_exec_costs_exec.  */
1552   },
1553   /* MULT SImode */
1554   {
1555     {
1556       COSTS_N_INSNS (2),        /* simple.  */
1557       COSTS_N_INSNS (3),        /* flag_setting.  */
1558       COSTS_N_INSNS (2),        /* extend.  */
1559       COSTS_N_INSNS (3),        /* add.  */
1560       COSTS_N_INSNS (2),        /* extend_add.  */
1561       COSTS_N_INSNS (18)        /* idiv.  */
1562     },
1563     /* MULT DImode */
1564     {
1565       0,                        /* simple (N/A).  */
1566       0,                        /* flag_setting (N/A).  */
1567       COSTS_N_INSNS (3),        /* extend.  */
1568       0,                        /* add (N/A).  */
1569       COSTS_N_INSNS (3),        /* extend_add.  */
1570       0                         /* idiv (N/A).  */
1571     }
1572   },
1573   /* LD/ST */
1574   {
1575     COSTS_N_INSNS (3),  /* load.  */
1576     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1577     COSTS_N_INSNS (3),  /* ldrd.  */
1578     COSTS_N_INSNS (3),  /* ldm_1st.  */
1579     1,                  /* ldm_regs_per_insn_1st.  */
1580     2,                  /* ldm_regs_per_insn_subsequent.  */
1581     COSTS_N_INSNS (3),  /* loadf.  */
1582     COSTS_N_INSNS (3),  /* loadd.  */
1583     0,                  /* load_unaligned.  */
1584     0,                  /* store.  */
1585     0,                  /* strd.  */
1586     0,                  /* stm_1st.  */
1587     1,                  /* stm_regs_per_insn_1st.  */
1588     2,                  /* stm_regs_per_insn_subsequent.  */
1589     COSTS_N_INSNS (2),  /* storef.  */
1590     COSTS_N_INSNS (2),  /* stored.  */
1591     0,                  /* store_unaligned.  */
1592     COSTS_N_INSNS (1),  /* loadv.  */
1593     COSTS_N_INSNS (1)   /* storev.  */
1594   },
1595   {
1596     /* FP SFmode */
1597     {
1598       COSTS_N_INSNS (17),       /* div.  */
1599       COSTS_N_INSNS (4),        /* mult.  */
1600       COSTS_N_INSNS (8),        /* mult_addsub. */
1601       COSTS_N_INSNS (8),        /* fma.  */
1602       COSTS_N_INSNS (4),        /* addsub.  */
1603       COSTS_N_INSNS (2),        /* fpconst. */
1604       COSTS_N_INSNS (2),        /* neg.  */
1605       COSTS_N_INSNS (2),        /* compare.  */
1606       COSTS_N_INSNS (4),        /* widen.  */
1607       COSTS_N_INSNS (4),        /* narrow.  */
1608       COSTS_N_INSNS (4),        /* toint.  */
1609       COSTS_N_INSNS (4),        /* fromint.  */
1610       COSTS_N_INSNS (4)         /* roundint.  */
1611     },
1612     /* FP DFmode */
1613     {
1614       COSTS_N_INSNS (31),       /* div.  */
1615       COSTS_N_INSNS (4),        /* mult.  */
1616       COSTS_N_INSNS (8),        /* mult_addsub.  */
1617       COSTS_N_INSNS (8),        /* fma.  */
1618       COSTS_N_INSNS (4),        /* addsub.  */
1619       COSTS_N_INSNS (2),        /* fpconst.  */
1620       COSTS_N_INSNS (2),        /* neg.  */
1621       COSTS_N_INSNS (2),        /* compare.  */
1622       COSTS_N_INSNS (4),        /* widen.  */
1623       COSTS_N_INSNS (4),        /* narrow.  */
1624       COSTS_N_INSNS (4),        /* toint.  */
1625       COSTS_N_INSNS (4),        /* fromint.  */
1626       COSTS_N_INSNS (4)         /* roundint.  */
1627     }
1628   },
1629   /* Vector */
1630   {
1631     COSTS_N_INSNS (1),  /* alu.  */
1632     COSTS_N_INSNS (4),  /* mult.  */
1633     COSTS_N_INSNS (1),  /* movi.  */
1634     COSTS_N_INSNS (2),  /* dup.  */
1635     COSTS_N_INSNS (2)   /* extract.  */
1636   }
1637 };
1638
1639 const struct cpu_cost_table cortexa15_extra_costs =
1640 {
1641   /* ALU */
1642   {
1643     0,                  /* arith.  */
1644     0,                  /* logical.  */
1645     0,                  /* shift.  */
1646     0,                  /* shift_reg.  */
1647     COSTS_N_INSNS (1),  /* arith_shift.  */
1648     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1649     COSTS_N_INSNS (1),  /* log_shift.  */
1650     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1651     0,                  /* extend.  */
1652     COSTS_N_INSNS (1),  /* extend_arith.  */
1653     COSTS_N_INSNS (1),  /* bfi.  */
1654     0,                  /* bfx.  */
1655     0,                  /* clz.  */
1656     0,                  /* rev.  */
1657     0,                  /* non_exec.  */
1658     true                /* non_exec_costs_exec.  */
1659   },
1660   /* MULT SImode */
1661   {
1662     {
1663       COSTS_N_INSNS (2),        /* simple.  */
1664       COSTS_N_INSNS (3),        /* flag_setting.  */
1665       COSTS_N_INSNS (2),        /* extend.  */
1666       COSTS_N_INSNS (2),        /* add.  */
1667       COSTS_N_INSNS (2),        /* extend_add.  */
1668       COSTS_N_INSNS (18)        /* idiv.  */
1669     },
1670     /* MULT DImode */
1671     {
1672       0,                        /* simple (N/A).  */
1673       0,                        /* flag_setting (N/A).  */
1674       COSTS_N_INSNS (3),        /* extend.  */
1675       0,                        /* add (N/A).  */
1676       COSTS_N_INSNS (3),        /* extend_add.  */
1677       0                         /* idiv (N/A).  */
1678     }
1679   },
1680   /* LD/ST */
1681   {
1682     COSTS_N_INSNS (3),  /* load.  */
1683     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1684     COSTS_N_INSNS (3),  /* ldrd.  */
1685     COSTS_N_INSNS (4),  /* ldm_1st.  */
1686     1,                  /* ldm_regs_per_insn_1st.  */
1687     2,                  /* ldm_regs_per_insn_subsequent.  */
1688     COSTS_N_INSNS (4),  /* loadf.  */
1689     COSTS_N_INSNS (4),  /* loadd.  */
1690     0,                  /* load_unaligned.  */
1691     0,                  /* store.  */
1692     0,                  /* strd.  */
1693     COSTS_N_INSNS (1),  /* stm_1st.  */
1694     1,                  /* stm_regs_per_insn_1st.  */
1695     2,                  /* stm_regs_per_insn_subsequent.  */
1696     0,                  /* storef.  */
1697     0,                  /* stored.  */
1698     0,                  /* store_unaligned.  */
1699     COSTS_N_INSNS (1),  /* loadv.  */
1700     COSTS_N_INSNS (1)   /* storev.  */
1701   },
1702   {
1703     /* FP SFmode */
1704     {
1705       COSTS_N_INSNS (17),       /* div.  */
1706       COSTS_N_INSNS (4),        /* mult.  */
1707       COSTS_N_INSNS (8),        /* mult_addsub. */
1708       COSTS_N_INSNS (8),        /* fma.  */
1709       COSTS_N_INSNS (4),        /* addsub.  */
1710       COSTS_N_INSNS (2),        /* fpconst. */
1711       COSTS_N_INSNS (2),        /* neg.  */
1712       COSTS_N_INSNS (5),        /* compare.  */
1713       COSTS_N_INSNS (4),        /* widen.  */
1714       COSTS_N_INSNS (4),        /* narrow.  */
1715       COSTS_N_INSNS (4),        /* toint.  */
1716       COSTS_N_INSNS (4),        /* fromint.  */
1717       COSTS_N_INSNS (4)         /* roundint.  */
1718     },
1719     /* FP DFmode */
1720     {
1721       COSTS_N_INSNS (31),       /* div.  */
1722       COSTS_N_INSNS (4),        /* mult.  */
1723       COSTS_N_INSNS (8),        /* mult_addsub.  */
1724       COSTS_N_INSNS (8),        /* fma.  */
1725       COSTS_N_INSNS (4),        /* addsub.  */
1726       COSTS_N_INSNS (2),        /* fpconst.  */
1727       COSTS_N_INSNS (2),        /* neg.  */
1728       COSTS_N_INSNS (2),        /* compare.  */
1729       COSTS_N_INSNS (4),        /* widen.  */
1730       COSTS_N_INSNS (4),        /* narrow.  */
1731       COSTS_N_INSNS (4),        /* toint.  */
1732       COSTS_N_INSNS (4),        /* fromint.  */
1733       COSTS_N_INSNS (4)         /* roundint.  */
1734     }
1735   },
1736   /* Vector */
1737   {
1738     COSTS_N_INSNS (1),  /* alu.  */
1739     COSTS_N_INSNS (4),  /* mult.  */
1740     COSTS_N_INSNS (1),  /* movi.  */
1741     COSTS_N_INSNS (2),  /* dup.  */
1742     COSTS_N_INSNS (2)   /* extract.  */
1743   }
1744 };
1745
1746 const struct cpu_cost_table v7m_extra_costs =
1747 {
1748   /* ALU */
1749   {
1750     0,                  /* arith.  */
1751     0,                  /* logical.  */
1752     0,                  /* shift.  */
1753     0,                  /* shift_reg.  */
1754     0,                  /* arith_shift.  */
1755     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1756     0,                  /* log_shift.  */
1757     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1758     0,                  /* extend.  */
1759     COSTS_N_INSNS (1),  /* extend_arith.  */
1760     0,                  /* bfi.  */
1761     0,                  /* bfx.  */
1762     0,                  /* clz.  */
1763     0,                  /* rev.  */
1764     COSTS_N_INSNS (1),  /* non_exec.  */
1765     false               /* non_exec_costs_exec.  */
1766   },
1767   {
1768     /* MULT SImode */
1769     {
1770       COSTS_N_INSNS (1),        /* simple.  */
1771       COSTS_N_INSNS (1),        /* flag_setting.  */
1772       COSTS_N_INSNS (2),        /* extend.  */
1773       COSTS_N_INSNS (1),        /* add.  */
1774       COSTS_N_INSNS (3),        /* extend_add.  */
1775       COSTS_N_INSNS (8)         /* idiv.  */
1776     },
1777     /* MULT DImode */
1778     {
1779       0,                        /* simple (N/A).  */
1780       0,                        /* flag_setting (N/A).  */
1781       COSTS_N_INSNS (2),        /* extend.  */
1782       0,                        /* add (N/A).  */
1783       COSTS_N_INSNS (3),        /* extend_add.  */
1784       0                         /* idiv (N/A).  */
1785     }
1786   },
1787   /* LD/ST */
1788   {
1789     COSTS_N_INSNS (2),  /* load.  */
1790     0,                  /* load_sign_extend.  */
1791     COSTS_N_INSNS (3),  /* ldrd.  */
1792     COSTS_N_INSNS (2),  /* ldm_1st.  */
1793     1,                  /* ldm_regs_per_insn_1st.  */
1794     1,                  /* ldm_regs_per_insn_subsequent.  */
1795     COSTS_N_INSNS (2),  /* loadf.  */
1796     COSTS_N_INSNS (3),  /* loadd.  */
1797     COSTS_N_INSNS (1),  /* load_unaligned.  */
1798     COSTS_N_INSNS (2),  /* store.  */
1799     COSTS_N_INSNS (3),  /* strd.  */
1800     COSTS_N_INSNS (2),  /* stm_1st.  */
1801     1,                  /* stm_regs_per_insn_1st.  */
1802     1,                  /* stm_regs_per_insn_subsequent.  */
1803     COSTS_N_INSNS (2),  /* storef.  */
1804     COSTS_N_INSNS (3),  /* stored.  */
1805     COSTS_N_INSNS (1),  /* store_unaligned.  */
1806     COSTS_N_INSNS (1),  /* loadv.  */
1807     COSTS_N_INSNS (1)   /* storev.  */
1808   },
1809   {
1810     /* FP SFmode */
1811     {
1812       COSTS_N_INSNS (7),        /* div.  */
1813       COSTS_N_INSNS (2),        /* mult.  */
1814       COSTS_N_INSNS (5),        /* mult_addsub.  */
1815       COSTS_N_INSNS (3),        /* fma.  */
1816       COSTS_N_INSNS (1),        /* addsub.  */
1817       0,                        /* fpconst.  */
1818       0,                        /* neg.  */
1819       0,                        /* compare.  */
1820       0,                        /* widen.  */
1821       0,                        /* narrow.  */
1822       0,                        /* toint.  */
1823       0,                        /* fromint.  */
1824       0                         /* roundint.  */
1825     },
1826     /* FP DFmode */
1827     {
1828       COSTS_N_INSNS (15),       /* div.  */
1829       COSTS_N_INSNS (5),        /* mult.  */
1830       COSTS_N_INSNS (7),        /* mult_addsub.  */
1831       COSTS_N_INSNS (7),        /* fma.  */
1832       COSTS_N_INSNS (3),        /* addsub.  */
1833       0,                        /* fpconst.  */
1834       0,                        /* neg.  */
1835       0,                        /* compare.  */
1836       0,                        /* widen.  */
1837       0,                        /* narrow.  */
1838       0,                        /* toint.  */
1839       0,                        /* fromint.  */
1840       0                         /* roundint.  */
1841     }
1842   },
1843   /* Vector */
1844   {
1845     COSTS_N_INSNS (1),  /* alu.  */
1846     COSTS_N_INSNS (4),  /* mult.  */
1847     COSTS_N_INSNS (1),  /* movi.  */
1848     COSTS_N_INSNS (2),  /* dup.  */
1849     COSTS_N_INSNS (2)   /* extract.  */
1850   }
1851 };
1852
1853 const struct addr_mode_cost_table generic_addr_mode_costs =
1854 {
1855   /* int.  */
1856   {
1857     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1858     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1859     COSTS_N_INSNS (0)   /* AMO_WB.  */
1860   },
1861   /* float.  */
1862   {
1863     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1864     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1865     COSTS_N_INSNS (0)   /* AMO_WB.  */
1866   },
1867   /* vector.  */
1868   {
1869     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1870     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1871     COSTS_N_INSNS (0)   /* AMO_WB.  */
1872   }
1873 };
1874
1875 const struct tune_params arm_slowmul_tune =
1876 {
1877   &generic_extra_costs,                 /* Insn extra costs.  */
1878   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1879   NULL,                                 /* Sched adj cost.  */
1880   arm_default_branch_cost,
1881   &arm_default_vec_cost,
1882   3,                                            /* Constant limit.  */
1883   5,                                            /* Max cond insns.  */
1884   8,                                            /* Memset max inline.  */
1885   1,                                            /* Issue rate.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   tune_params::PREF_CONST_POOL_TRUE,
1888   tune_params::PREF_LDRD_FALSE,
1889   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1890   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1891   tune_params::DISPARAGE_FLAGS_NEITHER,
1892   tune_params::PREF_NEON_STRINGOPS_FALSE,
1893   tune_params::FUSE_NOTHING,
1894   tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897 const struct tune_params arm_fastmul_tune =
1898 {
1899   &generic_extra_costs,                 /* Insn extra costs.  */
1900   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1901   NULL,                                 /* Sched adj cost.  */
1902   arm_default_branch_cost,
1903   &arm_default_vec_cost,
1904   1,                                            /* Constant limit.  */
1905   5,                                            /* Max cond insns.  */
1906   8,                                            /* Memset max inline.  */
1907   1,                                            /* Issue rate.  */
1908   ARM_PREFETCH_NOT_BENEFICIAL,
1909   tune_params::PREF_CONST_POOL_TRUE,
1910   tune_params::PREF_LDRD_FALSE,
1911   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1912   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1913   tune_params::DISPARAGE_FLAGS_NEITHER,
1914   tune_params::PREF_NEON_STRINGOPS_FALSE,
1915   tune_params::FUSE_NOTHING,
1916   tune_params::SCHED_AUTOPREF_OFF
1917 };
1918
1919 /* StrongARM has early execution of branches, so a sequence that is worth
1920    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1921
1922 const struct tune_params arm_strongarm_tune =
1923 {
1924   &generic_extra_costs,                 /* Insn extra costs.  */
1925   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1926   NULL,                                 /* Sched adj cost.  */
1927   arm_default_branch_cost,
1928   &arm_default_vec_cost,
1929   1,                                            /* Constant limit.  */
1930   3,                                            /* Max cond insns.  */
1931   8,                                            /* Memset max inline.  */
1932   1,                                            /* Issue rate.  */
1933   ARM_PREFETCH_NOT_BENEFICIAL,
1934   tune_params::PREF_CONST_POOL_TRUE,
1935   tune_params::PREF_LDRD_FALSE,
1936   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1937   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1938   tune_params::DISPARAGE_FLAGS_NEITHER,
1939   tune_params::PREF_NEON_STRINGOPS_FALSE,
1940   tune_params::FUSE_NOTHING,
1941   tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_xscale_tune =
1945 {
1946   &generic_extra_costs,                 /* Insn extra costs.  */
1947   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1948   xscale_sched_adjust_cost,
1949   arm_default_branch_cost,
1950   &arm_default_vec_cost,
1951   2,                                            /* Constant limit.  */
1952   3,                                            /* Max cond insns.  */
1953   8,                                            /* Memset max inline.  */
1954   1,                                            /* Issue rate.  */
1955   ARM_PREFETCH_NOT_BENEFICIAL,
1956   tune_params::PREF_CONST_POOL_TRUE,
1957   tune_params::PREF_LDRD_FALSE,
1958   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1959   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1960   tune_params::DISPARAGE_FLAGS_NEITHER,
1961   tune_params::PREF_NEON_STRINGOPS_FALSE,
1962   tune_params::FUSE_NOTHING,
1963   tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_9e_tune =
1967 {
1968   &generic_extra_costs,                 /* Insn extra costs.  */
1969   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1970   NULL,                                 /* Sched adj cost.  */
1971   arm_default_branch_cost,
1972   &arm_default_vec_cost,
1973   1,                                            /* Constant limit.  */
1974   5,                                            /* Max cond insns.  */
1975   8,                                            /* Memset max inline.  */
1976   1,                                            /* Issue rate.  */
1977   ARM_PREFETCH_NOT_BENEFICIAL,
1978   tune_params::PREF_CONST_POOL_TRUE,
1979   tune_params::PREF_LDRD_FALSE,
1980   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1981   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1982   tune_params::DISPARAGE_FLAGS_NEITHER,
1983   tune_params::PREF_NEON_STRINGOPS_FALSE,
1984   tune_params::FUSE_NOTHING,
1985   tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_marvell_pj4_tune =
1989 {
1990   &generic_extra_costs,                 /* Insn extra costs.  */
1991   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1992   NULL,                                 /* Sched adj cost.  */
1993   arm_default_branch_cost,
1994   &arm_default_vec_cost,
1995   1,                                            /* Constant limit.  */
1996   5,                                            /* Max cond insns.  */
1997   8,                                            /* Memset max inline.  */
1998   2,                                            /* Issue rate.  */
1999   ARM_PREFETCH_NOT_BENEFICIAL,
2000   tune_params::PREF_CONST_POOL_TRUE,
2001   tune_params::PREF_LDRD_FALSE,
2002   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2003   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2004   tune_params::DISPARAGE_FLAGS_NEITHER,
2005   tune_params::PREF_NEON_STRINGOPS_FALSE,
2006   tune_params::FUSE_NOTHING,
2007   tune_params::SCHED_AUTOPREF_OFF
2008 };
2009
2010 const struct tune_params arm_v6t2_tune =
2011 {
2012   &generic_extra_costs,                 /* Insn extra costs.  */
2013   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2014   NULL,                                 /* Sched adj cost.  */
2015   arm_default_branch_cost,
2016   &arm_default_vec_cost,
2017   1,                                            /* Constant limit.  */
2018   5,                                            /* Max cond insns.  */
2019   8,                                            /* Memset max inline.  */
2020   1,                                            /* Issue rate.  */
2021   ARM_PREFETCH_NOT_BENEFICIAL,
2022   tune_params::PREF_CONST_POOL_FALSE,
2023   tune_params::PREF_LDRD_FALSE,
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2025   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2026   tune_params::DISPARAGE_FLAGS_NEITHER,
2027   tune_params::PREF_NEON_STRINGOPS_FALSE,
2028   tune_params::FUSE_NOTHING,
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032
2033 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2034 const struct tune_params arm_cortex_tune =
2035 {
2036   &generic_extra_costs,
2037   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2038   NULL,                                 /* Sched adj cost.  */
2039   arm_default_branch_cost,
2040   &arm_default_vec_cost,
2041   1,                                            /* Constant limit.  */
2042   5,                                            /* Max cond insns.  */
2043   8,                                            /* Memset max inline.  */
2044   2,                                            /* Issue rate.  */
2045   ARM_PREFETCH_NOT_BENEFICIAL,
2046   tune_params::PREF_CONST_POOL_FALSE,
2047   tune_params::PREF_LDRD_FALSE,
2048   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2050   tune_params::DISPARAGE_FLAGS_NEITHER,
2051   tune_params::PREF_NEON_STRINGOPS_FALSE,
2052   tune_params::FUSE_NOTHING,
2053   tune_params::SCHED_AUTOPREF_OFF
2054 };
2055
2056 const struct tune_params arm_cortex_a8_tune =
2057 {
2058   &cortexa8_extra_costs,
2059   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2060   NULL,                                 /* Sched adj cost.  */
2061   arm_default_branch_cost,
2062   &arm_default_vec_cost,
2063   1,                                            /* Constant limit.  */
2064   5,                                            /* Max cond insns.  */
2065   8,                                            /* Memset max inline.  */
2066   2,                                            /* Issue rate.  */
2067   ARM_PREFETCH_NOT_BENEFICIAL,
2068   tune_params::PREF_CONST_POOL_FALSE,
2069   tune_params::PREF_LDRD_FALSE,
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2072   tune_params::DISPARAGE_FLAGS_NEITHER,
2073   tune_params::PREF_NEON_STRINGOPS_TRUE,
2074   tune_params::FUSE_NOTHING,
2075   tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a7_tune =
2079 {
2080   &cortexa7_extra_costs,
2081   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2082   NULL,                                 /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   5,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   2,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_FALSE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_NEITHER,
2095   tune_params::PREF_NEON_STRINGOPS_TRUE,
2096   tune_params::FUSE_NOTHING,
2097   tune_params::SCHED_AUTOPREF_OFF
2098 };
2099
2100 const struct tune_params arm_cortex_a15_tune =
2101 {
2102   &cortexa15_extra_costs,
2103   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2104   NULL,                                 /* Sched adj cost.  */
2105   arm_default_branch_cost,
2106   &arm_default_vec_cost,
2107   1,                                            /* Constant limit.  */
2108   2,                                            /* Max cond insns.  */
2109   8,                                            /* Memset max inline.  */
2110   3,                                            /* Issue rate.  */
2111   ARM_PREFETCH_NOT_BENEFICIAL,
2112   tune_params::PREF_CONST_POOL_FALSE,
2113   tune_params::PREF_LDRD_TRUE,
2114   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2116   tune_params::DISPARAGE_FLAGS_ALL,
2117   tune_params::PREF_NEON_STRINGOPS_TRUE,
2118   tune_params::FUSE_NOTHING,
2119   tune_params::SCHED_AUTOPREF_FULL
2120 };
2121
2122 const struct tune_params arm_cortex_a35_tune =
2123 {
2124   &cortexa53_extra_costs,
2125   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2126   NULL,                                 /* Sched adj cost.  */
2127   arm_default_branch_cost,
2128   &arm_default_vec_cost,
2129   1,                                            /* Constant limit.  */
2130   5,                                            /* Max cond insns.  */
2131   8,                                            /* Memset max inline.  */
2132   1,                                            /* Issue rate.  */
2133   ARM_PREFETCH_NOT_BENEFICIAL,
2134   tune_params::PREF_CONST_POOL_FALSE,
2135   tune_params::PREF_LDRD_FALSE,
2136   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2137   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2138   tune_params::DISPARAGE_FLAGS_NEITHER,
2139   tune_params::PREF_NEON_STRINGOPS_TRUE,
2140   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2141   tune_params::SCHED_AUTOPREF_OFF
2142 };
2143
2144 const struct tune_params arm_cortex_a53_tune =
2145 {
2146   &cortexa53_extra_costs,
2147   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2148   NULL,                                 /* Sched adj cost.  */
2149   arm_default_branch_cost,
2150   &arm_default_vec_cost,
2151   1,                                            /* Constant limit.  */
2152   5,                                            /* Max cond insns.  */
2153   8,                                            /* Memset max inline.  */
2154   2,                                            /* Issue rate.  */
2155   ARM_PREFETCH_NOT_BENEFICIAL,
2156   tune_params::PREF_CONST_POOL_FALSE,
2157   tune_params::PREF_LDRD_FALSE,
2158   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2159   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2160   tune_params::DISPARAGE_FLAGS_NEITHER,
2161   tune_params::PREF_NEON_STRINGOPS_TRUE,
2162   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2163   tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 const struct tune_params arm_cortex_a57_tune =
2167 {
2168   &cortexa57_extra_costs,
2169   &generic_addr_mode_costs,             /* addressing mode costs */
2170   NULL,                                 /* Sched adj cost.  */
2171   arm_default_branch_cost,
2172   &arm_default_vec_cost,
2173   1,                                            /* Constant limit.  */
2174   2,                                            /* Max cond insns.  */
2175   8,                                            /* Memset max inline.  */
2176   3,                                            /* Issue rate.  */
2177   ARM_PREFETCH_NOT_BENEFICIAL,
2178   tune_params::PREF_CONST_POOL_FALSE,
2179   tune_params::PREF_LDRD_TRUE,
2180   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2182   tune_params::DISPARAGE_FLAGS_ALL,
2183   tune_params::PREF_NEON_STRINGOPS_TRUE,
2184   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2185   tune_params::SCHED_AUTOPREF_FULL
2186 };
2187
2188 const struct tune_params arm_exynosm1_tune =
2189 {
2190   &exynosm1_extra_costs,
2191   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2192   NULL,                                         /* Sched adj cost.  */
2193   arm_default_branch_cost,
2194   &arm_default_vec_cost,
2195   1,                                            /* Constant limit.  */
2196   2,                                            /* Max cond insns.  */
2197   8,                                            /* Memset max inline.  */
2198   3,                                            /* Issue rate.  */
2199   ARM_PREFETCH_NOT_BENEFICIAL,
2200   tune_params::PREF_CONST_POOL_FALSE,
2201   tune_params::PREF_LDRD_TRUE,
2202   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2203   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2204   tune_params::DISPARAGE_FLAGS_ALL,
2205   tune_params::PREF_NEON_STRINGOPS_TRUE,
2206   tune_params::FUSE_NOTHING,
2207   tune_params::SCHED_AUTOPREF_OFF
2208 };
2209
2210 const struct tune_params arm_xgene1_tune =
2211 {
2212   &xgene1_extra_costs,
2213   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2214   NULL,                                 /* Sched adj cost.  */
2215   arm_default_branch_cost,
2216   &arm_default_vec_cost,
2217   1,                                            /* Constant limit.  */
2218   2,                                            /* Max cond insns.  */
2219   32,                                           /* Memset max inline.  */
2220   4,                                            /* Issue rate.  */
2221   ARM_PREFETCH_NOT_BENEFICIAL,
2222   tune_params::PREF_CONST_POOL_FALSE,
2223   tune_params::PREF_LDRD_TRUE,
2224   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2225   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2226   tune_params::DISPARAGE_FLAGS_ALL,
2227   tune_params::PREF_NEON_STRINGOPS_FALSE,
2228   tune_params::FUSE_NOTHING,
2229   tune_params::SCHED_AUTOPREF_OFF
2230 };
2231
2232 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2233    less appealing.  Set max_insns_skipped to a low value.  */
2234
2235 const struct tune_params arm_cortex_a5_tune =
2236 {
2237   &cortexa5_extra_costs,
2238   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2239   NULL,                                 /* Sched adj cost.  */
2240   arm_cortex_a5_branch_cost,
2241   &arm_default_vec_cost,
2242   1,                                            /* Constant limit.  */
2243   1,                                            /* Max cond insns.  */
2244   8,                                            /* Memset max inline.  */
2245   2,                                            /* Issue rate.  */
2246   ARM_PREFETCH_NOT_BENEFICIAL,
2247   tune_params::PREF_CONST_POOL_FALSE,
2248   tune_params::PREF_LDRD_FALSE,
2249   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2251   tune_params::DISPARAGE_FLAGS_NEITHER,
2252   tune_params::PREF_NEON_STRINGOPS_TRUE,
2253   tune_params::FUSE_NOTHING,
2254   tune_params::SCHED_AUTOPREF_OFF
2255 };
2256
2257 const struct tune_params arm_cortex_a9_tune =
2258 {
2259   &cortexa9_extra_costs,
2260   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2261   cortex_a9_sched_adjust_cost,
2262   arm_default_branch_cost,
2263   &arm_default_vec_cost,
2264   1,                                            /* Constant limit.  */
2265   5,                                            /* Max cond insns.  */
2266   8,                                            /* Memset max inline.  */
2267   2,                                            /* Issue rate.  */
2268   ARM_PREFETCH_BENEFICIAL(4,32,32),
2269   tune_params::PREF_CONST_POOL_FALSE,
2270   tune_params::PREF_LDRD_FALSE,
2271   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2272   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2273   tune_params::DISPARAGE_FLAGS_NEITHER,
2274   tune_params::PREF_NEON_STRINGOPS_FALSE,
2275   tune_params::FUSE_NOTHING,
2276   tune_params::SCHED_AUTOPREF_OFF
2277 };
2278
2279 const struct tune_params arm_cortex_a12_tune =
2280 {
2281   &cortexa12_extra_costs,
2282   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2283   NULL,                                 /* Sched adj cost.  */
2284   arm_default_branch_cost,
2285   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2286   1,                                            /* Constant limit.  */
2287   2,                                            /* Max cond insns.  */
2288   8,                                            /* Memset max inline.  */
2289   2,                                            /* Issue rate.  */
2290   ARM_PREFETCH_NOT_BENEFICIAL,
2291   tune_params::PREF_CONST_POOL_FALSE,
2292   tune_params::PREF_LDRD_TRUE,
2293   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2294   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2295   tune_params::DISPARAGE_FLAGS_ALL,
2296   tune_params::PREF_NEON_STRINGOPS_TRUE,
2297   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2298   tune_params::SCHED_AUTOPREF_OFF
2299 };
2300
2301 const struct tune_params arm_cortex_a73_tune =
2302 {
2303   &cortexa57_extra_costs,
2304   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2305   NULL,                                         /* Sched adj cost.  */
2306   arm_default_branch_cost,
2307   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2308   1,                                            /* Constant limit.  */
2309   2,                                            /* Max cond insns.  */
2310   8,                                            /* Memset max inline.  */
2311   2,                                            /* Issue rate.  */
2312   ARM_PREFETCH_NOT_BENEFICIAL,
2313   tune_params::PREF_CONST_POOL_FALSE,
2314   tune_params::PREF_LDRD_TRUE,
2315   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2316   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2317   tune_params::DISPARAGE_FLAGS_ALL,
2318   tune_params::PREF_NEON_STRINGOPS_TRUE,
2319   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2320   tune_params::SCHED_AUTOPREF_FULL
2321 };
2322
2323 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2324    cycle to execute each.  An LDR from the constant pool also takes two cycles
2325    to execute, but mildly increases pipelining opportunity (consecutive
2326    loads/stores can be pipelined together, saving one cycle), and may also
2327    improve icache utilisation.  Hence we prefer the constant pool for such
2328    processors.  */
2329
2330 const struct tune_params arm_v7m_tune =
2331 {
2332   &v7m_extra_costs,
2333   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2334   NULL,                                 /* Sched adj cost.  */
2335   arm_cortex_m_branch_cost,
2336   &arm_default_vec_cost,
2337   1,                                            /* Constant limit.  */
2338   2,                                            /* Max cond insns.  */
2339   8,                                            /* Memset max inline.  */
2340   1,                                            /* Issue rate.  */
2341   ARM_PREFETCH_NOT_BENEFICIAL,
2342   tune_params::PREF_CONST_POOL_TRUE,
2343   tune_params::PREF_LDRD_FALSE,
2344   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2345   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2346   tune_params::DISPARAGE_FLAGS_NEITHER,
2347   tune_params::PREF_NEON_STRINGOPS_FALSE,
2348   tune_params::FUSE_NOTHING,
2349   tune_params::SCHED_AUTOPREF_OFF
2350 };
2351
2352 /* Cortex-M7 tuning.  */
2353
2354 const struct tune_params arm_cortex_m7_tune =
2355 {
2356   &v7m_extra_costs,
2357   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2358   NULL,                                 /* Sched adj cost.  */
2359   arm_cortex_m7_branch_cost,
2360   &arm_default_vec_cost,
2361   0,                                            /* Constant limit.  */
2362   1,                                            /* Max cond insns.  */
2363   8,                                            /* Memset max inline.  */
2364   2,                                            /* Issue rate.  */
2365   ARM_PREFETCH_NOT_BENEFICIAL,
2366   tune_params::PREF_CONST_POOL_TRUE,
2367   tune_params::PREF_LDRD_FALSE,
2368   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2369   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2370   tune_params::DISPARAGE_FLAGS_NEITHER,
2371   tune_params::PREF_NEON_STRINGOPS_FALSE,
2372   tune_params::FUSE_NOTHING,
2373   tune_params::SCHED_AUTOPREF_OFF
2374 };
2375
2376 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2377    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2378    cortex-m23.  */
2379 const struct tune_params arm_v6m_tune =
2380 {
2381   &generic_extra_costs,                 /* Insn extra costs.  */
2382   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2383   NULL,                                 /* Sched adj cost.  */
2384   arm_default_branch_cost,
2385   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2386   1,                                            /* Constant limit.  */
2387   5,                                            /* Max cond insns.  */
2388   8,                                            /* Memset max inline.  */
2389   1,                                            /* Issue rate.  */
2390   ARM_PREFETCH_NOT_BENEFICIAL,
2391   tune_params::PREF_CONST_POOL_FALSE,
2392   tune_params::PREF_LDRD_FALSE,
2393   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2394   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2395   tune_params::DISPARAGE_FLAGS_NEITHER,
2396   tune_params::PREF_NEON_STRINGOPS_FALSE,
2397   tune_params::FUSE_NOTHING,
2398   tune_params::SCHED_AUTOPREF_OFF
2399 };
2400
2401 const struct tune_params arm_fa726te_tune =
2402 {
2403   &generic_extra_costs,                         /* Insn extra costs.  */
2404   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2405   fa726te_sched_adjust_cost,
2406   arm_default_branch_cost,
2407   &arm_default_vec_cost,
2408   1,                                            /* Constant limit.  */
2409   5,                                            /* Max cond insns.  */
2410   8,                                            /* Memset max inline.  */
2411   2,                                            /* Issue rate.  */
2412   ARM_PREFETCH_NOT_BENEFICIAL,
2413   tune_params::PREF_CONST_POOL_TRUE,
2414   tune_params::PREF_LDRD_FALSE,
2415   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2416   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2417   tune_params::DISPARAGE_FLAGS_NEITHER,
2418   tune_params::PREF_NEON_STRINGOPS_FALSE,
2419   tune_params::FUSE_NOTHING,
2420   tune_params::SCHED_AUTOPREF_OFF
2421 };
2422
2423 /* Key type for Pointer Authentication extension.  */
2424 enum aarch_key_type aarch_ra_sign_key = AARCH_KEY_A;
2425
2426 char *accepted_branch_protection_string = NULL;
2427
2428 /* Auto-generated CPU, FPU and architecture tables.  */
2429 #include "arm-cpu-data.h"
2430
2431 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2432    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433    is thus chosen to be big enough to hold the longest architecture name.  */
2434
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2436
2437 /* Supported TLS relocations.  */
2438
2439 enum tls_reloc {
2440   TLS_GD32,
2441   TLS_GD32_FDPIC,
2442   TLS_LDM32,
2443   TLS_LDM32_FDPIC,
2444   TLS_LDO32,
2445   TLS_IE32,
2446   TLS_IE32_FDPIC,
2447   TLS_LE32,
2448   TLS_DESCSEQ   /* GNU scheme */
2449 };
2450
2451 /* The maximum number of insns to be used when loading a constant.  */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2454 {
2455   return size_p ? 1 : current_tune->constant_limit;
2456 }
2457
2458 /* Emit an insn that's a simple single-set.  Both the operands must be known
2459    to be valid.  */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2462 {
2463   return emit_insn (gen_rtx_SET (x, y));
2464 }
2465
2466 /* Return the number of bits set in VALUE.  */
2467 static unsigned
2468 bit_count (unsigned long value)
2469 {
2470   unsigned long count = 0;
2471
2472   while (value)
2473     {
2474       count++;
2475       value &= value - 1;  /* Clear the least-significant set bit.  */
2476     }
2477
2478   return count;
2479 }
2480
2481 /* Return the number of bits set in BMAP.  */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2484 {
2485   unsigned int count = 0;
2486   unsigned int n = 0;
2487   sbitmap_iterator sbi;
2488
2489   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490     count++;
2491   return count;
2492 }
2493
2494 typedef struct
2495 {
2496   machine_mode mode;
2497   const char *name;
2498 } arm_fixed_mode_set;
2499
2500 /* A small helper for setting fixed-point library libfuncs.  */
2501
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504                              const char *funcname, const char *modename,
2505                              int num_suffix)
2506 {
2507   char buffer[50];
2508
2509   if (num_suffix == 0)
2510     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511   else
2512     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2513
2514   set_optab_libfunc (optable, mode, buffer);
2515 }
2516
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519                             machine_mode from, const char *funcname,
2520                             const char *toname, const char *fromname)
2521 {
2522   char buffer[50];
2523   const char *maybe_suffix_2 = "";
2524
2525   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2526   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529     maybe_suffix_2 = "2";
2530
2531   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532            maybe_suffix_2);
2533
2534   set_conv_libfunc (optable, to, from, buffer);
2535 }
2536
2537 static GTY(()) rtx speculation_barrier_libfunc;
2538
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540    machine mode MODE.  */
2541
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2544 {
2545   /* Arithmetic.  */
2546   set_optab_libfunc (add_optab, mode, NULL);
2547   set_optab_libfunc (sdiv_optab, mode, NULL);
2548   set_optab_libfunc (smul_optab, mode, NULL);
2549   set_optab_libfunc (neg_optab, mode, NULL);
2550   set_optab_libfunc (sub_optab, mode, NULL);
2551
2552   /* Comparisons.  */
2553   set_optab_libfunc (eq_optab, mode, NULL);
2554   set_optab_libfunc (ne_optab, mode, NULL);
2555   set_optab_libfunc (lt_optab, mode, NULL);
2556   set_optab_libfunc (le_optab, mode, NULL);
2557   set_optab_libfunc (ge_optab, mode, NULL);
2558   set_optab_libfunc (gt_optab, mode, NULL);
2559   set_optab_libfunc (unord_optab, mode, NULL);
2560 }
2561
2562 /* Set up library functions unique to ARM.  */
2563 static void
2564 arm_init_libfuncs (void)
2565 {
2566   machine_mode mode_iter;
2567
2568   /* For Linux, we have access to kernel support for atomic operations.  */
2569   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2571
2572   /* There are no special library functions unless we are using the
2573      ARM BPABI.  */
2574   if (!TARGET_BPABI)
2575     return;
2576
2577   /* The functions below are described in Section 4 of the "Run-Time
2578      ABI for the ARM architecture", Version 1.0.  */
2579
2580   /* Double-precision floating-point arithmetic.  Table 2.  */
2581   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2586
2587   /* Double-precision comparisons.  Table 3.  */
2588   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589   set_optab_libfunc (ne_optab, DFmode, NULL);
2590   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2595
2596   /* Single-precision floating-point arithmetic.  Table 4.  */
2597   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2602
2603   /* Single-precision comparisons.  Table 5.  */
2604   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605   set_optab_libfunc (ne_optab, SFmode, NULL);
2606   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2611
2612   /* Floating-point to integer conversions.  Table 6.  */
2613   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2621
2622   /* Conversions between floating types.  Table 7.  */
2623   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2625
2626   /* Integer to floating-point conversions.  Table 8.  */
2627   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2635
2636   /* Long long.  Table 9.  */
2637   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2645
2646   /* Integer (32/32->32) division.  \S 4.3.1.  */
2647   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2649
2650   /* The divmod functions are designed so that they can be used for
2651      plain division, even though they return both the quotient and the
2652      remainder.  The quotient is returned in the usual location (i.e.,
2653      r0 for SImode, {r0, r1} for DImode), just as would be expected
2654      for an ordinary division routine.  Because the AAPCS calling
2655      conventions specify that all of { r0, r1, r2, r3 } are
2656      callee-saved registers, there is no need to tell the compiler
2657      explicitly that those registers are clobbered by these
2658      routines.  */
2659   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2661
2662   /* For SImode division the ABI provides div-without-mod routines,
2663      which are faster.  */
2664   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2666
2667   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2668      divmod libcalls instead.  */
2669   set_optab_libfunc (smod_optab, DImode, NULL);
2670   set_optab_libfunc (umod_optab, DImode, NULL);
2671   set_optab_libfunc (smod_optab, SImode, NULL);
2672   set_optab_libfunc (umod_optab, SImode, NULL);
2673
2674   /* Half-precision float operations.  The compiler handles all operations
2675      with NULL libfuncs by converting the SFmode.  */
2676   switch (arm_fp16_format)
2677     {
2678     case ARM_FP16_FORMAT_IEEE:
2679     case ARM_FP16_FORMAT_ALTERNATIVE:
2680
2681       /* Conversions.  */
2682       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684                          ? "__gnu_f2h_ieee"
2685                          : "__gnu_f2h_alternative"));
2686       set_conv_libfunc (sext_optab, SFmode, HFmode,
2687                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688                          ? "__gnu_h2f_ieee"
2689                          : "__gnu_h2f_alternative"));
2690
2691       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693                          ? "__gnu_d2h_ieee"
2694                          : "__gnu_d2h_alternative"));
2695
2696       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697       break;
2698
2699     default:
2700       break;
2701     }
2702
2703   /* For all possible libcalls in BFmode, record NULL.  */
2704   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2705     {
2706       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2710     }
2711   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2712
2713   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2714   {
2715     const arm_fixed_mode_set fixed_arith_modes[] =
2716       {
2717         { E_QQmode, "qq" },
2718         { E_UQQmode, "uqq" },
2719         { E_HQmode, "hq" },
2720         { E_UHQmode, "uhq" },
2721         { E_SQmode, "sq" },
2722         { E_USQmode, "usq" },
2723         { E_DQmode, "dq" },
2724         { E_UDQmode, "udq" },
2725         { E_TQmode, "tq" },
2726         { E_UTQmode, "utq" },
2727         { E_HAmode, "ha" },
2728         { E_UHAmode, "uha" },
2729         { E_SAmode, "sa" },
2730         { E_USAmode, "usa" },
2731         { E_DAmode, "da" },
2732         { E_UDAmode, "uda" },
2733         { E_TAmode, "ta" },
2734         { E_UTAmode, "uta" }
2735       };
2736     const arm_fixed_mode_set fixed_conv_modes[] =
2737       {
2738         { E_QQmode, "qq" },
2739         { E_UQQmode, "uqq" },
2740         { E_HQmode, "hq" },
2741         { E_UHQmode, "uhq" },
2742         { E_SQmode, "sq" },
2743         { E_USQmode, "usq" },
2744         { E_DQmode, "dq" },
2745         { E_UDQmode, "udq" },
2746         { E_TQmode, "tq" },
2747         { E_UTQmode, "utq" },
2748         { E_HAmode, "ha" },
2749         { E_UHAmode, "uha" },
2750         { E_SAmode, "sa" },
2751         { E_USAmode, "usa" },
2752         { E_DAmode, "da" },
2753         { E_UDAmode, "uda" },
2754         { E_TAmode, "ta" },
2755         { E_UTAmode, "uta" },
2756         { E_QImode, "qi" },
2757         { E_HImode, "hi" },
2758         { E_SImode, "si" },
2759         { E_DImode, "di" },
2760         { E_TImode, "ti" },
2761         { E_SFmode, "sf" },
2762         { E_DFmode, "df" }
2763       };
2764     unsigned int i, j;
2765
2766     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2767       {
2768         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769                                      "add", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771                                      "ssadd", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773                                      "usadd", fixed_arith_modes[i].name, 3);
2774         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775                                      "sub", fixed_arith_modes[i].name, 3);
2776         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777                                      "sssub", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779                                      "ussub", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781                                      "mul", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783                                      "ssmul", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785                                      "usmul", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787                                      "div", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789                                      "udiv", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791                                      "ssdiv", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793                                      "usdiv", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795                                      "neg", fixed_arith_modes[i].name, 2);
2796         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797                                      "ssneg", fixed_arith_modes[i].name, 2);
2798         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799                                      "usneg", fixed_arith_modes[i].name, 2);
2800         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801                                      "ashl", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803                                      "ashr", fixed_arith_modes[i].name, 3);
2804         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805                                      "lshr", fixed_arith_modes[i].name, 3);
2806         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807                                      "ssashl", fixed_arith_modes[i].name, 3);
2808         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809                                      "usashl", fixed_arith_modes[i].name, 3);
2810         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811                                      "cmp", fixed_arith_modes[i].name, 2);
2812       }
2813
2814     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2816         {
2817           if (i == j
2818               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820             continue;
2821
2822           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823                                       fixed_conv_modes[j].mode, "fract",
2824                                       fixed_conv_modes[i].name,
2825                                       fixed_conv_modes[j].name);
2826           arm_set_fixed_conv_libfunc (satfract_optab,
2827                                       fixed_conv_modes[i].mode,
2828                                       fixed_conv_modes[j].mode, "satfract",
2829                                       fixed_conv_modes[i].name,
2830                                       fixed_conv_modes[j].name);
2831           arm_set_fixed_conv_libfunc (fractuns_optab,
2832                                       fixed_conv_modes[i].mode,
2833                                       fixed_conv_modes[j].mode, "fractuns",
2834                                       fixed_conv_modes[i].name,
2835                                       fixed_conv_modes[j].name);
2836           arm_set_fixed_conv_libfunc (satfractuns_optab,
2837                                       fixed_conv_modes[i].mode,
2838                                       fixed_conv_modes[j].mode, "satfractuns",
2839                                       fixed_conv_modes[i].name,
2840                                       fixed_conv_modes[j].name);
2841         }
2842   }
2843
2844   if (TARGET_AAPCS_BASED)
2845     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2846
2847   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2848 }
2849
2850 /* On AAPCS systems, this is the "struct __va_list".  */
2851 static GTY(()) tree va_list_type;
2852
2853 /* Return the type to use as __builtin_va_list.  */
2854 static tree
2855 arm_build_builtin_va_list (void)
2856 {
2857   tree va_list_name;
2858   tree ap_field;
2859
2860   if (!TARGET_AAPCS_BASED)
2861     return std_build_builtin_va_list ();
2862
2863   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2864      defined as:
2865
2866        struct __va_list
2867        {
2868          void *__ap;
2869        };
2870
2871      The C Library ABI further reinforces this definition in \S
2872      4.1.
2873
2874      We must follow this definition exactly.  The structure tag
2875      name is visible in C++ mangled names, and thus forms a part
2876      of the ABI.  The field name may be used by people who
2877      #include <stdarg.h>.  */
2878   /* Create the type.  */
2879   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2880   /* Give it the required name.  */
2881   va_list_name = build_decl (BUILTINS_LOCATION,
2882                              TYPE_DECL,
2883                              get_identifier ("__va_list"),
2884                              va_list_type);
2885   DECL_ARTIFICIAL (va_list_name) = 1;
2886   TYPE_NAME (va_list_type) = va_list_name;
2887   TYPE_STUB_DECL (va_list_type) = va_list_name;
2888   /* Create the __ap field.  */
2889   ap_field = build_decl (BUILTINS_LOCATION,
2890                          FIELD_DECL,
2891                          get_identifier ("__ap"),
2892                          ptr_type_node);
2893   DECL_ARTIFICIAL (ap_field) = 1;
2894   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2895   TYPE_FIELDS (va_list_type) = ap_field;
2896   /* Compute its layout.  */
2897   layout_type (va_list_type);
2898
2899   return va_list_type;
2900 }
2901
2902 /* Return an expression of type "void *" pointing to the next
2903    available argument in a variable-argument list.  VALIST is the
2904    user-level va_list object, of type __builtin_va_list.  */
2905 static tree
2906 arm_extract_valist_ptr (tree valist)
2907 {
2908   if (TREE_TYPE (valist) == error_mark_node)
2909     return error_mark_node;
2910
2911   /* On an AAPCS target, the pointer is stored within "struct
2912      va_list".  */
2913   if (TARGET_AAPCS_BASED)
2914     {
2915       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2916       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2917                        valist, ap_field, NULL_TREE);
2918     }
2919
2920   return valist;
2921 }
2922
2923 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2924 static void
2925 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2926 {
2927   valist = arm_extract_valist_ptr (valist);
2928   std_expand_builtin_va_start (valist, nextarg);
2929 }
2930
2931 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2932 static tree
2933 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2934                           gimple_seq *post_p)
2935 {
2936   valist = arm_extract_valist_ptr (valist);
2937   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2938 }
2939
2940 /* Check any incompatible options that the user has specified.  */
2941 static void
2942 arm_option_check_internal (struct gcc_options *opts)
2943 {
2944   int flags = opts->x_target_flags;
2945
2946   /* iWMMXt and NEON are incompatible.  */
2947   if (TARGET_IWMMXT
2948       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2949     error ("iWMMXt and NEON are incompatible");
2950
2951   /* Make sure that the processor choice does not conflict with any of the
2952      other command line choices.  */
2953   if (TARGET_ARM_P (flags)
2954       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2955     error ("target CPU does not support ARM mode");
2956
2957   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2958   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2959     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2960
2961   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2962     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2963
2964   /* If this target is normally configured to use APCS frames, warn if they
2965      are turned off and debugging is turned on.  */
2966   if (TARGET_ARM_P (flags)
2967       && write_symbols != NO_DEBUG
2968       && !TARGET_APCS_FRAME
2969       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2970     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2971              "debugging");
2972
2973   /* iWMMXt unsupported under Thumb mode.  */
2974   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2975     error ("iWMMXt unsupported under Thumb mode");
2976
2977   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2978     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2979
2980   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2981     {
2982       error ("RTP PIC is incompatible with Thumb");
2983       flag_pic = 0;
2984     }
2985
2986   if (target_pure_code || target_slow_flash_data)
2987     {
2988       const char *flag = (target_pure_code ? "-mpure-code" :
2989                                              "-mslow-flash-data");
2990       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2991
2992       /* We only support -mslow-flash-data on M-profile targets with
2993          MOVT.  */
2994       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2995         error ("%s only supports non-pic code on M-profile targets with the "
2996                "MOVT instruction", flag);
2997
2998       /* We only support -mpure-code on M-profile targets.  */
2999       if (target_pure_code && common_unsupported_modes)
3000         error ("%s only supports non-pic code on M-profile targets", flag);
3001
3002       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3003          -mword-relocations forbids relocation of MOVT/MOVW.  */
3004       if (target_word_relocations)
3005         error ("%s incompatible with %<-mword-relocations%>", flag);
3006     }
3007 }
3008
3009 /* Recompute the global settings depending on target attribute options.  */
3010
3011 static void
3012 arm_option_params_internal (void)
3013 {
3014   /* If we are not using the default (ARM mode) section anchor offset
3015      ranges, then set the correct ranges now.  */
3016   if (TARGET_THUMB1)
3017     {
3018       /* Thumb-1 LDR instructions cannot have negative offsets.
3019          Permissible positive offset ranges are 5-bit (for byte loads),
3020          6-bit (for halfword loads), or 7-bit (for word loads).
3021          Empirical results suggest a 7-bit anchor range gives the best
3022          overall code size.  */
3023       targetm.min_anchor_offset = 0;
3024       targetm.max_anchor_offset = 127;
3025     }
3026   else if (TARGET_THUMB2)
3027     {
3028       /* The minimum is set such that the total size of the block
3029          for a particular anchor is 248 + 1 + 4095 bytes, which is
3030          divisible by eight, ensuring natural spacing of anchors.  */
3031       targetm.min_anchor_offset = -248;
3032       targetm.max_anchor_offset = 4095;
3033     }
3034   else
3035     {
3036       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3037       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3038     }
3039
3040   /* Increase the number of conditional instructions with -Os.  */
3041   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3042
3043   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3044   if (TARGET_THUMB2)
3045     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3046
3047   if (TARGET_THUMB1)
3048     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3049   else
3050     targetm.md_asm_adjust = arm_md_asm_adjust;
3051 }
3052
3053 /* True if -mflip-thumb should next add an attribute for the default
3054    mode, false if it should next add an attribute for the opposite mode.  */
3055 static GTY(()) bool thumb_flipper;
3056
3057 /* Options after initial target override.  */
3058 static GTY(()) tree init_optimize;
3059
3060 static void
3061 arm_override_options_after_change_1 (struct gcc_options *opts,
3062                                      struct gcc_options *opts_set)
3063 {
3064   /* -falign-functions without argument: supply one.  */
3065   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3066     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3067       && opts->x_optimize_size ? "2" : "4";
3068 }
3069
3070 /* Implement targetm.override_options_after_change.  */
3071
3072 static void
3073 arm_override_options_after_change (void)
3074 {
3075   arm_override_options_after_change_1 (&global_options, &global_options_set);
3076 }
3077
3078 /* Implement TARGET_OPTION_RESTORE.  */
3079 static void
3080 arm_option_restore (struct gcc_options */* opts */,
3081                     struct gcc_options */* opts_set */,
3082                     struct cl_target_option *ptr)
3083 {
3084   arm_configure_build_target (&arm_active_target, ptr, false);
3085   arm_option_reconfigure_globals ();
3086 }
3087
3088 /* Reset options between modes that the user has specified.  */
3089 static void
3090 arm_option_override_internal (struct gcc_options *opts,
3091                               struct gcc_options *opts_set)
3092 {
3093   arm_override_options_after_change_1 (opts, opts_set);
3094
3095   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3096     {
3097       /* The default is to enable interworking, so this warning message would
3098          be confusing to users who have just compiled with
3099          eg, -march=armv4.  */
3100       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3101       opts->x_target_flags &= ~MASK_INTERWORK;
3102     }
3103
3104   if (TARGET_THUMB_P (opts->x_target_flags)
3105       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3106     {
3107       warning (0, "target CPU does not support THUMB instructions");
3108       opts->x_target_flags &= ~MASK_THUMB;
3109     }
3110
3111   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3112     {
3113       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3114       opts->x_target_flags &= ~MASK_APCS_FRAME;
3115     }
3116
3117   /* Callee super interworking implies thumb interworking.  Adding
3118      this to the flags here simplifies the logic elsewhere.  */
3119   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3120     opts->x_target_flags |= MASK_INTERWORK;
3121
3122   /* need to remember initial values so combinaisons of options like
3123      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3124   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3125
3126   if (! opts_set->x_arm_restrict_it)
3127     opts->x_arm_restrict_it = arm_arch8;
3128
3129   /* ARM execution state and M profile don't have [restrict] IT.  */
3130   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3131     opts->x_arm_restrict_it = 0;
3132
3133   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3134   if (!opts_set->x_arm_restrict_it
3135       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3136     opts->x_arm_restrict_it = 0;
3137
3138   /* Enable -munaligned-access by default for
3139      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3140      i.e. Thumb2 and ARM state only.
3141      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3142      - ARMv8 architecture-base processors.
3143
3144      Disable -munaligned-access by default for
3145      - all pre-ARMv6 architecture-based processors
3146      - ARMv6-M architecture-based processors
3147      - ARMv8-M Baseline processors.  */
3148
3149   if (! opts_set->x_unaligned_access)
3150     {
3151       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3152                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3153     }
3154   else if (opts->x_unaligned_access == 1
3155            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3156     {
3157       warning (0, "target CPU does not support unaligned accesses");
3158      opts->x_unaligned_access = 0;
3159     }
3160
3161   /* Don't warn since it's on by default in -O2.  */
3162   if (TARGET_THUMB1_P (opts->x_target_flags))
3163     opts->x_flag_schedule_insns = 0;
3164   else
3165     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3166
3167   /* Disable shrink-wrap when optimizing function for size, since it tends to
3168      generate additional returns.  */
3169   if (optimize_function_for_size_p (cfun)
3170       && TARGET_THUMB2_P (opts->x_target_flags))
3171     opts->x_flag_shrink_wrap = false;
3172   else
3173     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3174
3175   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3176      - epilogue_insns - does not accurately model the corresponding insns
3177      emitted in the asm file.  In particular, see the comment in thumb_exit
3178      'Find out how many of the (return) argument registers we can corrupt'.
3179      As a consequence, the epilogue may clobber registers without fipa-ra
3180      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3181      TODO: Accurately model clobbers for epilogue_insns and reenable
3182      fipa-ra.  */
3183   if (TARGET_THUMB1_P (opts->x_target_flags))
3184     opts->x_flag_ipa_ra = 0;
3185   else
3186     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3187
3188   /* Thumb2 inline assembly code should always use unified syntax.
3189      This will apply to ARM and Thumb1 eventually.  */
3190   if (TARGET_THUMB2_P (opts->x_target_flags))
3191     opts->x_inline_asm_unified = true;
3192
3193   if (arm_stack_protector_guard == SSP_GLOBAL
3194       && opts->x_arm_stack_protector_guard_offset_str)
3195     {
3196       error ("incompatible options %<-mstack-protector-guard=global%> and "
3197              "%<-mstack-protector-guard-offset=%s%>",
3198              arm_stack_protector_guard_offset_str);
3199     }
3200
3201   if (opts->x_arm_stack_protector_guard_offset_str)
3202     {
3203       char *end;
3204       const char *str = arm_stack_protector_guard_offset_str;
3205       errno = 0;
3206       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3207       if (!*str || *end || errno)
3208         error ("%qs is not a valid offset in %qs", str,
3209                "-mstack-protector-guard-offset=");
3210       arm_stack_protector_guard_offset = offs;
3211     }
3212
3213   if (arm_current_function_pac_enabled_p ())
3214     {
3215       if (!arm_arch8m_main)
3216         error ("This architecture does not support branch protection "
3217                "instructions");
3218       if (TARGET_TPCS_FRAME)
3219         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3220     }
3221
3222 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3223   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3224 #endif
3225 }
3226
3227 static sbitmap isa_all_fpubits_internal;
3228 static sbitmap isa_all_fpbits;
3229 static sbitmap isa_quirkbits;
3230
3231 /* Configure a build target TARGET from the user-specified options OPTS and
3232    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3233    architecture have been specified, but the two are not identical.  */
3234 void
3235 arm_configure_build_target (struct arm_build_target *target,
3236                             struct cl_target_option *opts,
3237                             bool warn_compatible)
3238 {
3239   const cpu_option *arm_selected_tune = NULL;
3240   const arch_option *arm_selected_arch = NULL;
3241   const cpu_option *arm_selected_cpu = NULL;
3242   const arm_fpu_desc *arm_selected_fpu = NULL;
3243   const char *tune_opts = NULL;
3244   const char *arch_opts = NULL;
3245   const char *cpu_opts = NULL;
3246
3247   bitmap_clear (target->isa);
3248   target->core_name = NULL;
3249   target->arch_name = NULL;
3250
3251   if (opts->x_arm_arch_string)
3252     {
3253       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3254                                                       "-march",
3255                                                       opts->x_arm_arch_string);
3256       arch_opts = strchr (opts->x_arm_arch_string, '+');
3257     }
3258
3259   if (opts->x_arm_cpu_string)
3260     {
3261       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3262                                                     opts->x_arm_cpu_string);
3263       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3264       arm_selected_tune = arm_selected_cpu;
3265       /* If taking the tuning from -mcpu, we don't need to rescan the
3266          options for tuning.  */
3267     }
3268
3269   if (opts->x_arm_tune_string)
3270     {
3271       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3272                                                      opts->x_arm_tune_string);
3273       tune_opts = strchr (opts->x_arm_tune_string, '+');
3274     }
3275
3276   if (opts->x_arm_branch_protection_string)
3277     {
3278       aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3279
3280       if (aarch_ra_sign_key != AARCH_KEY_A)
3281         {
3282           warning (0, "invalid key type for %<-mbranch-protection=%>");
3283           aarch_ra_sign_key = AARCH_KEY_A;
3284         }
3285     }
3286
3287   if (arm_selected_arch)
3288     {
3289       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3290       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3291                                  arch_opts);
3292
3293       if (arm_selected_cpu)
3294         {
3295           auto_sbitmap cpu_isa (isa_num_bits);
3296           auto_sbitmap isa_delta (isa_num_bits);
3297
3298           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3299           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3300                                      cpu_opts);
3301           bitmap_xor (isa_delta, cpu_isa, target->isa);
3302           /* Ignore any bits that are quirk bits.  */
3303           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3304           /* If the user (or the default configuration) has specified a
3305              specific FPU, then ignore any bits that depend on the FPU
3306              configuration.  Do similarly if using the soft-float
3307              ABI.  */
3308           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3309               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3310             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3311
3312           if (!bitmap_empty_p (isa_delta))
3313             {
3314               if (warn_compatible)
3315                 warning (0, "switch %<-mcpu=%s%> conflicts "
3316                          "with switch %<-march=%s%>",
3317                          opts->x_arm_cpu_string,
3318                          opts->x_arm_arch_string);
3319
3320               /* -march wins for code generation.
3321                  -mcpu wins for default tuning.  */
3322               if (!arm_selected_tune)
3323                 arm_selected_tune = arm_selected_cpu;
3324
3325               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3326               target->arch_name = arm_selected_arch->common.name;
3327             }
3328           else
3329             {
3330               /* Architecture and CPU are essentially the same.
3331                  Prefer the CPU setting.  */
3332               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3333               target->core_name = arm_selected_cpu->common.name;
3334               /* Copy the CPU's capabilities, so that we inherit the
3335                  appropriate extensions and quirks.  */
3336               bitmap_copy (target->isa, cpu_isa);
3337             }
3338         }
3339       else
3340         {
3341           /* Pick a CPU based on the architecture.  */
3342           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3343           target->arch_name = arm_selected_arch->common.name;
3344           /* Note: target->core_name is left unset in this path.  */
3345         }
3346     }
3347   else if (arm_selected_cpu)
3348     {
3349       target->core_name = arm_selected_cpu->common.name;
3350       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3351       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3352                                  cpu_opts);
3353       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3354     }
3355   /* If the user did not specify a processor or architecture, choose
3356      one for them.  */
3357   else
3358     {
3359       const cpu_option *sel;
3360       auto_sbitmap sought_isa (isa_num_bits);
3361       bitmap_clear (sought_isa);
3362       auto_sbitmap default_isa (isa_num_bits);
3363
3364       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3365                                                     TARGET_CPU_DEFAULT);
3366       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3367       gcc_assert (arm_selected_cpu->common.name);
3368
3369       /* RWE: All of the selection logic below (to the end of this
3370          'if' clause) looks somewhat suspect.  It appears to be mostly
3371          there to support forcing thumb support when the default CPU
3372          does not have thumb (somewhat dubious in terms of what the
3373          user might be expecting).  I think it should be removed once
3374          support for the pre-thumb era cores is removed.  */
3375       sel = arm_selected_cpu;
3376       arm_initialize_isa (default_isa, sel->common.isa_bits);
3377       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3378                                  cpu_opts);
3379
3380       /* Now check to see if the user has specified any command line
3381          switches that require certain abilities from the cpu.  */
3382
3383       if (TARGET_INTERWORK || TARGET_THUMB)
3384         bitmap_set_bit (sought_isa, isa_bit_thumb);
3385
3386       /* If there are such requirements and the default CPU does not
3387          satisfy them, we need to run over the complete list of
3388          cores looking for one that is satisfactory.  */
3389       if (!bitmap_empty_p (sought_isa)
3390           && !bitmap_subset_p (sought_isa, default_isa))
3391         {
3392           auto_sbitmap candidate_isa (isa_num_bits);
3393           /* We're only interested in a CPU with at least the
3394              capabilities of the default CPU and the required
3395              additional features.  */
3396           bitmap_ior (default_isa, default_isa, sought_isa);
3397
3398           /* Try to locate a CPU type that supports all of the abilities
3399              of the default CPU, plus the extra abilities requested by
3400              the user.  */
3401           for (sel = all_cores; sel->common.name != NULL; sel++)
3402             {
3403               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3404               /* An exact match?  */
3405               if (bitmap_equal_p (default_isa, candidate_isa))
3406                 break;
3407             }
3408
3409           if (sel->common.name == NULL)
3410             {
3411               unsigned current_bit_count = isa_num_bits;
3412               const cpu_option *best_fit = NULL;
3413
3414               /* Ideally we would like to issue an error message here
3415                  saying that it was not possible to find a CPU compatible
3416                  with the default CPU, but which also supports the command
3417                  line options specified by the programmer, and so they
3418                  ought to use the -mcpu=<name> command line option to
3419                  override the default CPU type.
3420
3421                  If we cannot find a CPU that has exactly the
3422                  characteristics of the default CPU and the given
3423                  command line options we scan the array again looking
3424                  for a best match.  The best match must have at least
3425                  the capabilities of the perfect match.  */
3426               for (sel = all_cores; sel->common.name != NULL; sel++)
3427                 {
3428                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3429
3430                   if (bitmap_subset_p (default_isa, candidate_isa))
3431                     {
3432                       unsigned count;
3433
3434                       bitmap_and_compl (candidate_isa, candidate_isa,
3435                                         default_isa);
3436                       count = bitmap_popcount (candidate_isa);
3437
3438                       if (count < current_bit_count)
3439                         {
3440                           best_fit = sel;
3441                           current_bit_count = count;
3442                         }
3443                     }
3444
3445                   gcc_assert (best_fit);
3446                   sel = best_fit;
3447                 }
3448             }
3449           arm_selected_cpu = sel;
3450         }
3451
3452       /* Now we know the CPU, we can finally initialize the target
3453          structure.  */
3454       target->core_name = arm_selected_cpu->common.name;
3455       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3456       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3457                                  cpu_opts);
3458       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3459     }
3460
3461   gcc_assert (arm_selected_cpu);
3462   gcc_assert (arm_selected_arch);
3463
3464   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3465     {
3466       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3467       auto_sbitmap fpu_bits (isa_num_bits);
3468
3469       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3470       /* This should clear out ALL bits relating to the FPU/simd
3471          extensions, to avoid potentially invalid combinations later on
3472          that we can't match.  At present we only clear out those bits
3473          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3474       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3475       bitmap_ior (target->isa, target->isa, fpu_bits);
3476     }
3477
3478   /* If we have the soft-float ABI, clear any feature bits relating to use of
3479      floating-point operations.  They'll just confuse things later on.  */
3480   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3481     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3482
3483   /* There may be implied bits which we still need to enable. These are
3484      non-named features which are needed to complete other sets of features,
3485      but cannot be enabled from arm-cpus.in due to being shared between
3486      multiple fgroups. Each entry in all_implied_fbits is of the form
3487      ante -> cons, meaning that if the feature "ante" is enabled, we should
3488      implicitly enable "cons".  */
3489   const struct fbit_implication *impl = all_implied_fbits;
3490   while (impl->ante)
3491     {
3492       if (bitmap_bit_p (target->isa, impl->ante))
3493         bitmap_set_bit (target->isa, impl->cons);
3494       impl++;
3495     }
3496
3497   if (!arm_selected_tune)
3498     arm_selected_tune = arm_selected_cpu;
3499   else /* Validate the features passed to -mtune.  */
3500     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3501
3502   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3503
3504   /* Finish initializing the target structure.  */
3505   if (!target->arch_name)
3506     target->arch_name = arm_selected_arch->common.name;
3507   target->arch_pp_name = arm_selected_arch->arch;
3508   target->base_arch = arm_selected_arch->base_arch;
3509   target->profile = arm_selected_arch->profile;
3510
3511   target->tune_flags = tune_data->tune_flags;
3512   target->tune = tune_data->tune;
3513   target->tune_core = tune_data->scheduler;
3514 }
3515
3516 /* Fix up any incompatible options that the user has specified.  */
3517 static void
3518 arm_option_override (void)
3519 {
3520   static const enum isa_feature fpu_bitlist_internal[]
3521     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3522   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3523   static const enum isa_feature fp_bitlist[]
3524     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3525   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3526   cl_target_option opts;
3527
3528   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3529   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3530
3531   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3532   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3533   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3534   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3535
3536   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3537
3538   if (!OPTION_SET_P (arm_fpu_index))
3539     {
3540       bool ok;
3541       int fpu_index;
3542
3543       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3544                                   CL_TARGET);
3545       gcc_assert (ok);
3546       arm_fpu_index = (enum fpu_type) fpu_index;
3547     }
3548
3549   cl_target_option_save (&opts, &global_options, &global_options_set);
3550   arm_configure_build_target (&arm_active_target, &opts, true);
3551
3552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3553   SUBTARGET_OVERRIDE_OPTIONS;
3554 #endif
3555
3556   /* Initialize boolean versions of the architectural flags, for use
3557      in the arm.md file and for enabling feature flags.  */
3558   arm_option_reconfigure_globals ();
3559
3560   arm_tune = arm_active_target.tune_core;
3561   tune_flags = arm_active_target.tune_flags;
3562   current_tune = arm_active_target.tune;
3563
3564   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3565   if (TARGET_APCS_FRAME)
3566     flag_shrink_wrap = false;
3567
3568   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3569     {
3570       warning (0, "%<-mapcs-stack-check%> incompatible with "
3571                "%<-mno-apcs-frame%>");
3572       target_flags |= MASK_APCS_FRAME;
3573     }
3574
3575   if (TARGET_POKE_FUNCTION_NAME)
3576     target_flags |= MASK_APCS_FRAME;
3577
3578   if (TARGET_APCS_REENT && flag_pic)
3579     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3580
3581   if (TARGET_APCS_REENT)
3582     warning (0, "APCS reentrant code not supported.  Ignored");
3583
3584   /* Set up some tuning parameters.  */
3585   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3586   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3587   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3588   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3589   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3590   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3591
3592   /* For arm2/3 there is no need to do any scheduling if we are doing
3593      software floating-point.  */
3594   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3595     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3596
3597   /* Override the default structure alignment for AAPCS ABI.  */
3598   if (!OPTION_SET_P (arm_structure_size_boundary))
3599     {
3600       if (TARGET_AAPCS_BASED)
3601         arm_structure_size_boundary = 8;
3602     }
3603   else
3604     {
3605       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3606
3607       if (arm_structure_size_boundary != 8
3608           && arm_structure_size_boundary != 32
3609           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3610         {
3611           if (ARM_DOUBLEWORD_ALIGN)
3612             warning (0,
3613                      "structure size boundary can only be set to 8, 32 or 64");
3614           else
3615             warning (0, "structure size boundary can only be set to 8 or 32");
3616           arm_structure_size_boundary
3617             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3618         }
3619     }
3620
3621   if (TARGET_VXWORKS_RTP)
3622     {
3623       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3624         arm_pic_data_is_text_relative = 0;
3625     }
3626   else if (flag_pic
3627            && !arm_pic_data_is_text_relative
3628            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3629     /* When text & data segments don't have a fixed displacement, the
3630        intended use is with a single, read only, pic base register.
3631        Unless the user explicitly requested not to do that, set
3632        it.  */
3633     target_flags |= MASK_SINGLE_PIC_BASE;
3634
3635   /* If stack checking is disabled, we can use r10 as the PIC register,
3636      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3637   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3638     {
3639       if (TARGET_VXWORKS_RTP)
3640         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3641       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3642     }
3643
3644   if (flag_pic && TARGET_VXWORKS_RTP)
3645     arm_pic_register = 9;
3646
3647   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3648   if (TARGET_FDPIC)
3649     {
3650       arm_pic_register = FDPIC_REGNUM;
3651       if (TARGET_THUMB1)
3652         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3653     }
3654
3655   if (arm_pic_register_string != NULL)
3656     {
3657       int pic_register = decode_reg_name (arm_pic_register_string);
3658
3659       if (!flag_pic)
3660         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3661
3662       /* Prevent the user from choosing an obviously stupid PIC register.  */
3663       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3664                || pic_register == HARD_FRAME_POINTER_REGNUM
3665                || pic_register == STACK_POINTER_REGNUM
3666                || pic_register >= PC_REGNUM
3667                || (TARGET_VXWORKS_RTP
3668                    && (unsigned int) pic_register != arm_pic_register))
3669         error ("unable to use %qs for PIC register", arm_pic_register_string);
3670       else
3671         arm_pic_register = pic_register;
3672     }
3673
3674   if (flag_pic)
3675     target_word_relocations = 1;
3676
3677   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3678   if (fix_cm3_ldrd == 2)
3679     {
3680       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3681         fix_cm3_ldrd = 1;
3682       else
3683         fix_cm3_ldrd = 0;
3684     }
3685
3686   /* Enable fix_vlldm by default if required.  */
3687   if (fix_vlldm == 2)
3688     {
3689       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3690         fix_vlldm = 1;
3691       else
3692         fix_vlldm = 0;
3693     }
3694
3695   /* Enable fix_aes by default if required.  */
3696   if (fix_aes_erratum_1742098 == 2)
3697     {
3698       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3699         fix_aes_erratum_1742098 = 1;
3700       else
3701         fix_aes_erratum_1742098 = 0;
3702     }
3703
3704   /* Hot/Cold partitioning is not currently supported, since we can't
3705      handle literal pool placement in that case.  */
3706   if (flag_reorder_blocks_and_partition)
3707     {
3708       inform (input_location,
3709               "%<-freorder-blocks-and-partition%> not supported "
3710               "on this architecture");
3711       flag_reorder_blocks_and_partition = 0;
3712       flag_reorder_blocks = 1;
3713     }
3714
3715   if (flag_pic)
3716     /* Hoisting PIC address calculations more aggressively provides a small,
3717        but measurable, size reduction for PIC code.  Therefore, we decrease
3718        the bar for unrestricted expression hoisting to the cost of PIC address
3719        calculation, which is 2 instructions.  */
3720     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3721                          param_gcse_unrestricted_cost, 2);
3722
3723   /* ARM EABI defaults to strict volatile bitfields.  */
3724   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3725       && abi_version_at_least(2))
3726     flag_strict_volatile_bitfields = 1;
3727
3728   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3729      have deemed it beneficial (signified by setting
3730      prefetch.num_slots to 1 or more).  */
3731   if (flag_prefetch_loop_arrays < 0
3732       && HAVE_prefetch
3733       && optimize >= 3
3734       && current_tune->prefetch.num_slots > 0)
3735     flag_prefetch_loop_arrays = 1;
3736
3737   /* Set up parameters to be used in prefetching algorithm.  Do not
3738      override the defaults unless we are tuning for a core we have
3739      researched values for.  */
3740   if (current_tune->prefetch.num_slots > 0)
3741     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3742                          param_simultaneous_prefetches,
3743                          current_tune->prefetch.num_slots);
3744   if (current_tune->prefetch.l1_cache_line_size >= 0)
3745     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3746                          param_l1_cache_line_size,
3747                          current_tune->prefetch.l1_cache_line_size);
3748   if (current_tune->prefetch.l1_cache_line_size >= 0)
3749     {
3750       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3751                            param_destruct_interfere_size,
3752                            current_tune->prefetch.l1_cache_line_size);
3753       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754                            param_construct_interfere_size,
3755                            current_tune->prefetch.l1_cache_line_size);
3756     }
3757   else
3758     {
3759       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3760       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3761          constructive?  */
3762       /* More recent Cortex chips have a 64-byte cache line, but are marked
3763          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3764       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3765                            param_destruct_interfere_size, 64);
3766       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3767                            param_construct_interfere_size, 64);
3768     }
3769
3770   if (current_tune->prefetch.l1_cache_size >= 0)
3771     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3772                          param_l1_cache_size,
3773                          current_tune->prefetch.l1_cache_size);
3774
3775   /* Look through ready list and all of queue for instructions
3776      relevant for L2 auto-prefetcher.  */
3777   int sched_autopref_queue_depth;
3778
3779   switch (current_tune->sched_autopref)
3780     {
3781     case tune_params::SCHED_AUTOPREF_OFF:
3782       sched_autopref_queue_depth = -1;
3783       break;
3784
3785     case tune_params::SCHED_AUTOPREF_RANK:
3786       sched_autopref_queue_depth = 0;
3787       break;
3788
3789     case tune_params::SCHED_AUTOPREF_FULL:
3790       sched_autopref_queue_depth = max_insn_queue_index + 1;
3791       break;
3792
3793     default:
3794       gcc_unreachable ();
3795     }
3796
3797   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798                        param_sched_autopref_queue_depth,
3799                        sched_autopref_queue_depth);
3800
3801   /* Currently, for slow flash data, we just disable literal pools.  We also
3802      disable it for pure-code.  */
3803   if (target_slow_flash_data || target_pure_code)
3804     arm_disable_literal_pool = true;
3805
3806   /* Disable scheduling fusion by default if it's not armv7 processor
3807      or doesn't prefer ldrd/strd.  */
3808   if (flag_schedule_fusion == 2
3809       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3810     flag_schedule_fusion = 0;
3811
3812   /* Need to remember initial options before they are overriden.  */
3813   init_optimize = build_optimization_node (&global_options,
3814                                            &global_options_set);
3815
3816   arm_options_perform_arch_sanity_checks ();
3817   arm_option_override_internal (&global_options, &global_options_set);
3818   arm_option_check_internal (&global_options);
3819   arm_option_params_internal ();
3820
3821   /* Create the default target_options structure.  */
3822   target_option_default_node = target_option_current_node
3823     = build_target_option_node (&global_options, &global_options_set);
3824
3825   /* Register global variables with the garbage collector.  */
3826   arm_add_gc_roots ();
3827
3828   /* Init initial mode for testing.  */
3829   thumb_flipper = TARGET_THUMB;
3830 }
3831
3832
3833 /* Reconfigure global status flags from the active_target.isa.  */
3834 void
3835 arm_option_reconfigure_globals (void)
3836 {
3837   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3838   arm_base_arch = arm_active_target.base_arch;
3839
3840   /* Initialize boolean versions of the architectural flags, for use
3841      in the arm.md file.  */
3842   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3843   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3844   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3845   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3846   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3847   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3848   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3849   arm_arch6m = arm_arch6 && !arm_arch_notm;
3850   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3851   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3852   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3853   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3854   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3855   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3856   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3857   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3858                                     isa_bit_armv8_1m_main);
3859   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3860   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3861   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3862   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3863   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3864   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3865   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3866   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3867   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3868   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3869   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3870   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3871   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3872
3873   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3874   if (arm_fp16_inst)
3875     {
3876       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3877         error ("selected fp16 options are incompatible");
3878       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3879     }
3880
3881   arm_arch_cde = 0;
3882   arm_arch_cde_coproc = 0;
3883   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3884                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3885                     isa_bit_cdecp6, isa_bit_cdecp7};
3886   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3887     {
3888       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3889       if (cde_bit)
3890         {
3891           arm_arch_cde |= cde_bit;
3892           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3893         }
3894     }
3895
3896   /* And finally, set up some quirks.  */
3897   arm_arch_no_volatile_ce
3898     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3899   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3900                                             isa_bit_quirk_armv6kz);
3901
3902   /* Use the cp15 method if it is available.  */
3903   if (target_thread_pointer == TP_AUTO)
3904     {
3905       if (arm_arch6k && !TARGET_THUMB1)
3906         target_thread_pointer = TP_CP15;
3907       else
3908         target_thread_pointer = TP_SOFT;
3909     }
3910
3911   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3912     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3913 }
3914
3915 /* Perform some validation between the desired architecture and the rest of the
3916    options.  */
3917 void
3918 arm_options_perform_arch_sanity_checks (void)
3919 {
3920   /* V5T code we generate is completely interworking capable, so we turn off
3921      TARGET_INTERWORK here to avoid many tests later on.  */
3922
3923   /* XXX However, we must pass the right pre-processor defines to CPP
3924      or GLD can get confused.  This is a hack.  */
3925   if (TARGET_INTERWORK)
3926     arm_cpp_interwork = 1;
3927
3928   if (arm_arch5t)
3929     target_flags &= ~MASK_INTERWORK;
3930
3931   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3932     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3933
3934   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3935     error ("iwmmxt abi requires an iwmmxt capable cpu");
3936
3937   /* BPABI targets use linker tricks to allow interworking on cores
3938      without thumb support.  */
3939   if (TARGET_INTERWORK
3940       && !TARGET_BPABI
3941       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3942     {
3943       warning (0, "target CPU does not support interworking" );
3944       target_flags &= ~MASK_INTERWORK;
3945     }
3946
3947   /* If soft-float is specified then don't use FPU.  */
3948   if (TARGET_SOFT_FLOAT)
3949     arm_fpu_attr = FPU_NONE;
3950   else
3951     arm_fpu_attr = FPU_VFP;
3952
3953   if (TARGET_AAPCS_BASED)
3954     {
3955       if (TARGET_CALLER_INTERWORKING)
3956         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3957       else
3958         if (TARGET_CALLEE_INTERWORKING)
3959           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3960     }
3961
3962   /* __fp16 support currently assumes the core has ldrh.  */
3963   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3964     sorry ("%<__fp16%> and no ldrh");
3965
3966   if (use_cmse && !arm_arch_cmse)
3967     error ("target CPU does not support ARMv8-M Security Extensions");
3968
3969   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3970      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3971   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3972     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3973
3974
3975   if (TARGET_AAPCS_BASED)
3976     {
3977       if (arm_abi == ARM_ABI_IWMMXT)
3978         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3979       else if (TARGET_HARD_FLOAT_ABI)
3980         {
3981           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3982           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3983               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3984             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3985         }
3986       else
3987         arm_pcs_default = ARM_PCS_AAPCS;
3988     }
3989   else
3990     {
3991       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3992         sorry ("%<-mfloat-abi=hard%> and VFP");
3993
3994       if (arm_abi == ARM_ABI_APCS)
3995         arm_pcs_default = ARM_PCS_APCS;
3996       else
3997         arm_pcs_default = ARM_PCS_ATPCS;
3998     }
3999 }
4000
4001 /* Test whether a local function descriptor is canonical, i.e.,
4002    whether we can use GOTOFFFUNCDESC to compute the address of the
4003    function.  */
4004 static bool
4005 arm_fdpic_local_funcdesc_p (rtx fnx)
4006 {
4007   tree fn;
4008   enum symbol_visibility vis;
4009   bool ret;
4010
4011   if (!TARGET_FDPIC)
4012     return true;
4013
4014   if (! SYMBOL_REF_LOCAL_P (fnx))
4015     return false;
4016
4017   fn = SYMBOL_REF_DECL (fnx);
4018
4019   if (! fn)
4020     return false;
4021
4022   vis = DECL_VISIBILITY (fn);
4023
4024   if (vis == VISIBILITY_PROTECTED)
4025     /* Private function descriptors for protected functions are not
4026        canonical.  Temporarily change the visibility to global so that
4027        we can ensure uniqueness of funcdesc pointers.  */
4028     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4029
4030   ret = default_binds_local_p_1 (fn, flag_pic);
4031
4032   DECL_VISIBILITY (fn) = vis;
4033
4034   return ret;
4035 }
4036
4037 static void
4038 arm_add_gc_roots (void)
4039 {
4040   gcc_obstack_init(&minipool_obstack);
4041   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4042 }
4043 \f
4044 /* A table of known ARM exception types.
4045    For use with the interrupt function attribute.  */
4046
4047 typedef struct
4048 {
4049   const char *const arg;
4050   const unsigned long return_value;
4051 }
4052 isr_attribute_arg;
4053
4054 static const isr_attribute_arg isr_attribute_args [] =
4055 {
4056   { "IRQ",   ARM_FT_ISR },
4057   { "irq",   ARM_FT_ISR },
4058   { "FIQ",   ARM_FT_FIQ },
4059   { "fiq",   ARM_FT_FIQ },
4060   { "ABORT", ARM_FT_ISR },
4061   { "abort", ARM_FT_ISR },
4062   { "UNDEF", ARM_FT_EXCEPTION },
4063   { "undef", ARM_FT_EXCEPTION },
4064   { "SWI",   ARM_FT_EXCEPTION },
4065   { "swi",   ARM_FT_EXCEPTION },
4066   { NULL,    ARM_FT_NORMAL }
4067 };
4068
4069 /* Returns the (interrupt) function type of the current
4070    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4071
4072 static unsigned long
4073 arm_isr_value (tree argument)
4074 {
4075   const isr_attribute_arg * ptr;
4076   const char *              arg;
4077
4078   if (!arm_arch_notm)
4079     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4080
4081   /* No argument - default to IRQ.  */
4082   if (argument == NULL_TREE)
4083     return ARM_FT_ISR;
4084
4085   /* Get the value of the argument.  */
4086   if (TREE_VALUE (argument) == NULL_TREE
4087       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4088     return ARM_FT_UNKNOWN;
4089
4090   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4091
4092   /* Check it against the list of known arguments.  */
4093   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4094     if (streq (arg, ptr->arg))
4095       return ptr->return_value;
4096
4097   /* An unrecognized interrupt type.  */
4098   return ARM_FT_UNKNOWN;
4099 }
4100
4101 /* Computes the type of the current function.  */
4102
4103 static unsigned long
4104 arm_compute_func_type (void)
4105 {
4106   unsigned long type = ARM_FT_UNKNOWN;
4107   tree a;
4108   tree attr;
4109
4110   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4111
4112   /* Decide if the current function is volatile.  Such functions
4113      never return, and many memory cycles can be saved by not storing
4114      register values that will never be needed again.  This optimization
4115      was added to speed up context switching in a kernel application.  */
4116   if (optimize > 0
4117       && (TREE_NOTHROW (current_function_decl)
4118           || !(flag_unwind_tables
4119                || (flag_exceptions
4120                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4121       && TREE_THIS_VOLATILE (current_function_decl))
4122     type |= ARM_FT_VOLATILE;
4123
4124   if (cfun->static_chain_decl != NULL)
4125     type |= ARM_FT_NESTED;
4126
4127   attr = DECL_ATTRIBUTES (current_function_decl);
4128
4129   a = lookup_attribute ("naked", attr);
4130   if (a != NULL_TREE)
4131     type |= ARM_FT_NAKED;
4132
4133   a = lookup_attribute ("isr", attr);
4134   if (a == NULL_TREE)
4135     a = lookup_attribute ("interrupt", attr);
4136
4137   if (a == NULL_TREE)
4138     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4139   else
4140     type |= arm_isr_value (TREE_VALUE (a));
4141
4142   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4143     type |= ARM_FT_CMSE_ENTRY;
4144
4145   return type;
4146 }
4147
4148 /* Returns the type of the current function.  */
4149
4150 unsigned long
4151 arm_current_func_type (void)
4152 {
4153   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4154     cfun->machine->func_type = arm_compute_func_type ();
4155
4156   return cfun->machine->func_type;
4157 }
4158
4159 bool
4160 arm_allocate_stack_slots_for_args (void)
4161 {
4162   /* Naked functions should not allocate stack slots for arguments.  */
4163   return !IS_NAKED (arm_current_func_type ());
4164 }
4165
4166 static bool
4167 arm_warn_func_return (tree decl)
4168 {
4169   /* Naked functions are implemented entirely in assembly, including the
4170      return sequence, so suppress warnings about this.  */
4171   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4172 }
4173
4174 \f
4175 /* Output assembler code for a block containing the constant parts
4176    of a trampoline, leaving space for the variable parts.
4177
4178    On the ARM, (if r8 is the static chain regnum, and remembering that
4179    referencing pc adds an offset of 8) the trampoline looks like:
4180            ldr          r8, [pc, #0]
4181            ldr          pc, [pc]
4182            .word        static chain value
4183            .word        function's address
4184    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4185
4186    In FDPIC mode, the trampoline looks like:
4187            .word        trampoline address
4188            .word        trampoline GOT address
4189            ldr          r12, [pc, #8] ; #4 for Arm mode
4190            ldr          r9,  [pc, #8] ; #4 for Arm mode
4191            ldr          pc,  [pc, #8] ; #4 for Arm mode
4192            .word        static chain value
4193            .word        GOT address
4194            .word        function's address
4195 */
4196
4197 static void
4198 arm_asm_trampoline_template (FILE *f)
4199 {
4200   fprintf (f, "\t.syntax unified\n");
4201
4202   if (TARGET_FDPIC)
4203     {
4204       /* The first two words are a function descriptor pointing to the
4205          trampoline code just below.  */
4206       if (TARGET_ARM)
4207         fprintf (f, "\t.arm\n");
4208       else if (TARGET_THUMB2)
4209         fprintf (f, "\t.thumb\n");
4210       else
4211         /* Only ARM and Thumb-2 are supported.  */
4212         gcc_unreachable ();
4213
4214       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4215       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4216       /* Trampoline code which sets the static chain register but also
4217          PIC register before jumping into real code.  */
4218       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4219                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4220                    TARGET_THUMB2 ? 8 : 4);
4221       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4222                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4223                    TARGET_THUMB2 ? 8 : 4);
4224       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4225                    PC_REGNUM, PC_REGNUM,
4226                    TARGET_THUMB2 ? 8 : 4);
4227       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228     }
4229   else if (TARGET_ARM)
4230     {
4231       fprintf (f, "\t.arm\n");
4232       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4233       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4234     }
4235   else if (TARGET_THUMB2)
4236     {
4237       fprintf (f, "\t.thumb\n");
4238       /* The Thumb-2 trampoline is similar to the arm implementation.
4239          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4240       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4241                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4242       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4243     }
4244   else
4245     {
4246       ASM_OUTPUT_ALIGN (f, 2);
4247       fprintf (f, "\t.code\t16\n");
4248       fprintf (f, ".Ltrampoline_start:\n");
4249       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4250       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4251       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4252       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4253       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4254       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4255     }
4256   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4257   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4258 }
4259
4260 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4261
4262 static void
4263 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4264 {
4265   rtx fnaddr, mem, a_tramp;
4266
4267   emit_block_move (m_tramp, assemble_trampoline_template (),
4268                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4269
4270   if (TARGET_FDPIC)
4271     {
4272       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4273       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4274       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4275       /* The function start address is at offset 8, but in Thumb mode
4276          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4277          below.  */
4278       rtx trampoline_code_start
4279         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4280
4281       /* Write initial funcdesc which points to the trampoline.  */
4282       mem = adjust_address (m_tramp, SImode, 0);
4283       emit_move_insn (mem, trampoline_code_start);
4284       mem = adjust_address (m_tramp, SImode, 4);
4285       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4286       /* Setup static chain.  */
4287       mem = adjust_address (m_tramp, SImode, 20);
4288       emit_move_insn (mem, chain_value);
4289       /* GOT + real function entry point.  */
4290       mem = adjust_address (m_tramp, SImode, 24);
4291       emit_move_insn (mem, gotaddr);
4292       mem = adjust_address (m_tramp, SImode, 28);
4293       emit_move_insn (mem, fnaddr);
4294     }
4295   else
4296     {
4297       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4298       emit_move_insn (mem, chain_value);
4299
4300       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4301       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4302       emit_move_insn (mem, fnaddr);
4303     }
4304
4305   a_tramp = XEXP (m_tramp, 0);
4306   maybe_emit_call_builtin___clear_cache (a_tramp,
4307                                          plus_constant (ptr_mode,
4308                                                         a_tramp,
4309                                                         TRAMPOLINE_SIZE));
4310 }
4311
4312 /* Thumb trampolines should be entered in thumb mode, so set
4313    the bottom bit of the address.  */
4314
4315 static rtx
4316 arm_trampoline_adjust_address (rtx addr)
4317 {
4318   /* For FDPIC don't fix trampoline address since it's a function
4319      descriptor and not a function address.  */
4320   if (TARGET_THUMB && !TARGET_FDPIC)
4321     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4322                                 NULL, 0, OPTAB_LIB_WIDEN);
4323   return addr;
4324 }
4325 \f
4326 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4327    includes call-clobbered registers too.  If this is a leaf function
4328    we can just examine the registers used by the RTL, but otherwise we
4329    have to assume that whatever function is called might clobber
4330    anything, and so we have to save all the call-clobbered registers
4331    as well.  */
4332 static inline bool reg_needs_saving_p (unsigned reg)
4333 {
4334   unsigned long func_type = arm_current_func_type ();
4335
4336   if (IS_INTERRUPT (func_type))
4337     if (df_regs_ever_live_p (reg)
4338         /* Save call-clobbered core registers.  */
4339         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4340       return true;
4341     else
4342       return false;
4343   else
4344     if (!df_regs_ever_live_p (reg)
4345         || call_used_or_fixed_reg_p (reg))
4346       return false;
4347     else
4348       return true;
4349 }
4350
4351 /* Return 1 if it is possible to return using a single instruction.
4352    If SIBLING is non-null, this is a test for a return before a sibling
4353    call.  SIBLING is the call insn, so we can examine its register usage.  */
4354
4355 int
4356 use_return_insn (int iscond, rtx sibling)
4357 {
4358   int regno;
4359   unsigned int func_type;
4360   unsigned long saved_int_regs;
4361   unsigned HOST_WIDE_INT stack_adjust;
4362   arm_stack_offsets *offsets;
4363
4364   /* Never use a return instruction before reload has run.  */
4365   if (!reload_completed)
4366     return 0;
4367
4368   /* Never use a return instruction when return address signing
4369      mechanism is enabled as it requires more than one
4370      instruction.  */
4371   if (arm_current_function_pac_enabled_p ())
4372     return 0;
4373
4374   func_type = arm_current_func_type ();
4375
4376   /* Naked, volatile and stack alignment functions need special
4377      consideration.  */
4378   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4379     return 0;
4380
4381   /* So do interrupt functions that use the frame pointer and Thumb
4382      interrupt functions.  */
4383   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4384     return 0;
4385
4386   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4387       && !optimize_function_for_size_p (cfun))
4388     return 0;
4389
4390   offsets = arm_get_frame_offsets ();
4391   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4392
4393   /* As do variadic functions.  */
4394   if (crtl->args.pretend_args_size
4395       || cfun->machine->uses_anonymous_args
4396       /* Or if the function calls __builtin_eh_return () */
4397       || crtl->calls_eh_return
4398       /* Or if the function calls alloca */
4399       || cfun->calls_alloca
4400       /* Or if there is a stack adjustment.  However, if the stack pointer
4401          is saved on the stack, we can use a pre-incrementing stack load.  */
4402       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4403                                  && stack_adjust == 4))
4404       /* Or if the static chain register was saved above the frame, under the
4405          assumption that the stack pointer isn't saved on the stack.  */
4406       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4407           && arm_compute_static_chain_stack_bytes() != 0))
4408     return 0;
4409
4410   saved_int_regs = offsets->saved_regs_mask;
4411
4412   /* Unfortunately, the insn
4413
4414        ldmib sp, {..., sp, ...}
4415
4416      triggers a bug on most SA-110 based devices, such that the stack
4417      pointer won't be correctly restored if the instruction takes a
4418      page fault.  We work around this problem by popping r3 along with
4419      the other registers, since that is never slower than executing
4420      another instruction.
4421
4422      We test for !arm_arch5t here, because code for any architecture
4423      less than this could potentially be run on one of the buggy
4424      chips.  */
4425   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4426     {
4427       /* Validate that r3 is a call-clobbered register (always true in
4428          the default abi) ...  */
4429       if (!call_used_or_fixed_reg_p (3))
4430         return 0;
4431
4432       /* ... that it isn't being used for a return value ... */
4433       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4434         return 0;
4435
4436       /* ... or for a tail-call argument ...  */
4437       if (sibling)
4438         {
4439           gcc_assert (CALL_P (sibling));
4440
4441           if (find_regno_fusage (sibling, USE, 3))
4442             return 0;
4443         }
4444
4445       /* ... and that there are no call-saved registers in r0-r2
4446          (always true in the default ABI).  */
4447       if (saved_int_regs & 0x7)
4448         return 0;
4449     }
4450
4451   /* Can't be done if interworking with Thumb, and any registers have been
4452      stacked.  */
4453   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4454     return 0;
4455
4456   /* On StrongARM, conditional returns are expensive if they aren't
4457      taken and multiple registers have been stacked.  */
4458   if (iscond && arm_tune_strongarm)
4459     {
4460       /* Conditional return when just the LR is stored is a simple
4461          conditional-load instruction, that's not expensive.  */
4462       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4463         return 0;
4464
4465       if (flag_pic
4466           && arm_pic_register != INVALID_REGNUM
4467           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4468         return 0;
4469     }
4470
4471   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4472      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4473      also needs several instructions to save and restore FP context.  */
4474   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4475     return 0;
4476
4477   /* If there are saved registers but the LR isn't saved, then we need
4478      two instructions for the return.  */
4479   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4480     return 0;
4481
4482   /* Can't be done if any of the VFP regs are pushed,
4483      since this also requires an insn.  */
4484   if (TARGET_VFP_BASE)
4485     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4486       if (reg_needs_saving_p (regno))
4487         return 0;
4488
4489   if (TARGET_REALLY_IWMMXT)
4490     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4491       if (reg_needs_saving_p (regno))
4492         return 0;
4493
4494   return 1;
4495 }
4496
4497 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4498    shrink-wrapping if possible.  This is the case if we need to emit a
4499    prologue, which we can test by looking at the offsets.  */
4500 bool
4501 use_simple_return_p (void)
4502 {
4503   arm_stack_offsets *offsets;
4504
4505   /* Note this function can be called before or after reload.  */
4506   if (!reload_completed)
4507     arm_compute_frame_layout ();
4508
4509   offsets = arm_get_frame_offsets ();
4510   return offsets->outgoing_args != 0;
4511 }
4512
4513 /* Return TRUE if int I is a valid immediate ARM constant.  */
4514
4515 int
4516 const_ok_for_arm (HOST_WIDE_INT i)
4517 {
4518   int lowbit;
4519
4520   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4521      be all zero, or all one.  */
4522   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4523       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4524           != ((~(unsigned HOST_WIDE_INT) 0)
4525               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4526     return FALSE;
4527
4528   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4529
4530   /* Fast return for 0 and small values.  We must do this for zero, since
4531      the code below can't handle that one case.  */
4532   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4533     return TRUE;
4534
4535   /* Get the number of trailing zeros.  */
4536   lowbit = ffs((int) i) - 1;
4537
4538   /* Only even shifts are allowed in ARM mode so round down to the
4539      nearest even number.  */
4540   if (TARGET_ARM)
4541     lowbit &= ~1;
4542
4543   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4544     return TRUE;
4545
4546   if (TARGET_ARM)
4547     {
4548       /* Allow rotated constants in ARM mode.  */
4549       if (lowbit <= 4
4550            && ((i & ~0xc000003f) == 0
4551                || (i & ~0xf000000f) == 0
4552                || (i & ~0xfc000003) == 0))
4553         return TRUE;
4554     }
4555   else if (TARGET_THUMB2)
4556     {
4557       HOST_WIDE_INT v;
4558
4559       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4560       v = i & 0xff;
4561       v |= v << 16;
4562       if (i == v || i == (v | (v << 8)))
4563         return TRUE;
4564
4565       /* Allow repeated pattern 0xXY00XY00.  */
4566       v = i & 0xff00;
4567       v |= v << 16;
4568       if (i == v)
4569         return TRUE;
4570     }
4571   else if (TARGET_HAVE_MOVT)
4572     {
4573       /* Thumb-1 Targets with MOVT.  */
4574       if (i > 0xffff)
4575         return FALSE;
4576       else
4577         return TRUE;
4578     }
4579
4580   return FALSE;
4581 }
4582
4583 /* Return true if I is a valid constant for the operation CODE.  */
4584 int
4585 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4586 {
4587   if (const_ok_for_arm (i))
4588     return 1;
4589
4590   switch (code)
4591     {
4592     case SET:
4593       /* See if we can use movw.  */
4594       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4595         return 1;
4596       else
4597         /* Otherwise, try mvn.  */
4598         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599
4600     case PLUS:
4601       /* See if we can use addw or subw.  */
4602       if (TARGET_THUMB2
4603           && ((i & 0xfffff000) == 0
4604               || ((-i) & 0xfffff000) == 0))
4605         return 1;
4606       /* Fall through.  */
4607     case COMPARE:
4608     case EQ:
4609     case NE:
4610     case GT:
4611     case LE:
4612     case LT:
4613     case GE:
4614     case GEU:
4615     case LTU:
4616     case GTU:
4617     case LEU:
4618     case UNORDERED:
4619     case ORDERED:
4620     case UNEQ:
4621     case UNGE:
4622     case UNLT:
4623     case UNGT:
4624     case UNLE:
4625       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4626
4627     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4628     case XOR:
4629       return 0;
4630
4631     case IOR:
4632       if (TARGET_THUMB2)
4633         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4634       return 0;
4635
4636     case AND:
4637       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4638
4639     default:
4640       gcc_unreachable ();
4641     }
4642 }
4643
4644 /* Return true if I is a valid di mode constant for the operation CODE.  */
4645 int
4646 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4647 {
4648   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4649   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4650   rtx hi = GEN_INT (hi_val);
4651   rtx lo = GEN_INT (lo_val);
4652
4653   if (TARGET_THUMB1)
4654     return 0;
4655
4656   switch (code)
4657     {
4658     case AND:
4659     case IOR:
4660     case XOR:
4661       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4662              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4663     case PLUS:
4664       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4665
4666     default:
4667       return 0;
4668     }
4669 }
4670
4671 /* Emit a sequence of insns to handle a large constant.
4672    CODE is the code of the operation required, it can be any of SET, PLUS,
4673    IOR, AND, XOR, MINUS;
4674    MODE is the mode in which the operation is being performed;
4675    VAL is the integer to operate on;
4676    SOURCE is the other operand (a register, or a null-pointer for SET);
4677    SUBTARGETS means it is safe to create scratch registers if that will
4678    either produce a simpler sequence, or we will want to cse the values.
4679    Return value is the number of insns emitted.  */
4680
4681 /* ??? Tweak this for thumb2.  */
4682 int
4683 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4684                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4685 {
4686   rtx cond;
4687
4688   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4689     cond = COND_EXEC_TEST (PATTERN (insn));
4690   else
4691     cond = NULL_RTX;
4692
4693   if (subtargets || code == SET
4694       || (REG_P (target) && REG_P (source)
4695           && REGNO (target) != REGNO (source)))
4696     {
4697       /* After arm_reorg has been called, we can't fix up expensive
4698          constants by pushing them into memory so we must synthesize
4699          them in-line, regardless of the cost.  This is only likely to
4700          be more costly on chips that have load delay slots and we are
4701          compiling without running the scheduler (so no splitting
4702          occurred before the final instruction emission).
4703
4704          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4705       */
4706       if (!cfun->machine->after_arm_reorg
4707           && !cond
4708           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4709                                 1, 0)
4710               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4711                  + (code != SET))))
4712         {
4713           if (code == SET)
4714             {
4715               /* Currently SET is the only monadic value for CODE, all
4716                  the rest are diadic.  */
4717               if (TARGET_USE_MOVT)
4718                 arm_emit_movpair (target, GEN_INT (val));
4719               else
4720                 emit_set_insn (target, GEN_INT (val));
4721
4722               return 1;
4723             }
4724           else
4725             {
4726               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4727
4728               if (TARGET_USE_MOVT)
4729                 arm_emit_movpair (temp, GEN_INT (val));
4730               else
4731                 emit_set_insn (temp, GEN_INT (val));
4732
4733               /* For MINUS, the value is subtracted from, since we never
4734                  have subtraction of a constant.  */
4735               if (code == MINUS)
4736                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4737               else
4738                 emit_set_insn (target,
4739                                gen_rtx_fmt_ee (code, mode, source, temp));
4740               return 2;
4741             }
4742         }
4743     }
4744
4745   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4746                            1);
4747 }
4748
4749 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4750    ARM/THUMB2 immediates, and add up to VAL.
4751    Thr function return value gives the number of insns required.  */
4752 static int
4753 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4754                             struct four_ints *return_sequence)
4755 {
4756   int best_consecutive_zeros = 0;
4757   int i;
4758   int best_start = 0;
4759   int insns1, insns2;
4760   struct four_ints tmp_sequence;
4761
4762   /* If we aren't targeting ARM, the best place to start is always at
4763      the bottom, otherwise look more closely.  */
4764   if (TARGET_ARM)
4765     {
4766       for (i = 0; i < 32; i += 2)
4767         {
4768           int consecutive_zeros = 0;
4769
4770           if (!(val & (3 << i)))
4771             {
4772               while ((i < 32) && !(val & (3 << i)))
4773                 {
4774                   consecutive_zeros += 2;
4775                   i += 2;
4776                 }
4777               if (consecutive_zeros > best_consecutive_zeros)
4778                 {
4779                   best_consecutive_zeros = consecutive_zeros;
4780                   best_start = i - consecutive_zeros;
4781                 }
4782               i -= 2;
4783             }
4784         }
4785     }
4786
4787   /* So long as it won't require any more insns to do so, it's
4788      desirable to emit a small constant (in bits 0...9) in the last
4789      insn.  This way there is more chance that it can be combined with
4790      a later addressing insn to form a pre-indexed load or store
4791      operation.  Consider:
4792
4793            *((volatile int *)0xe0000100) = 1;
4794            *((volatile int *)0xe0000110) = 2;
4795
4796      We want this to wind up as:
4797
4798             mov rA, #0xe0000000
4799             mov rB, #1
4800             str rB, [rA, #0x100]
4801             mov rB, #2
4802             str rB, [rA, #0x110]
4803
4804      rather than having to synthesize both large constants from scratch.
4805
4806      Therefore, we calculate how many insns would be required to emit
4807      the constant starting from `best_start', and also starting from
4808      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4809      yield a shorter sequence, we may as well use zero.  */
4810   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4811   if (best_start != 0
4812       && ((HOST_WIDE_INT_1U << best_start) < val))
4813     {
4814       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4815       if (insns2 <= insns1)
4816         {
4817           *return_sequence = tmp_sequence;
4818           insns1 = insns2;
4819         }
4820     }
4821
4822   return insns1;
4823 }
4824
4825 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4826 static int
4827 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4828                              struct four_ints *return_sequence, int i)
4829 {
4830   int remainder = val & 0xffffffff;
4831   int insns = 0;
4832
4833   /* Try and find a way of doing the job in either two or three
4834      instructions.
4835
4836      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4837      location.  We start at position I.  This may be the MSB, or
4838      optimial_immediate_sequence may have positioned it at the largest block
4839      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4840      wrapping around to the top of the word when we drop off the bottom.
4841      In the worst case this code should produce no more than four insns.
4842
4843      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4844      constants, shifted to any arbitrary location.  We should always start
4845      at the MSB.  */
4846   do
4847     {
4848       int end;
4849       unsigned int b1, b2, b3, b4;
4850       unsigned HOST_WIDE_INT result;
4851       int loc;
4852
4853       gcc_assert (insns < 4);
4854
4855       if (i <= 0)
4856         i += 32;
4857
4858       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4859       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4860         {
4861           loc = i;
4862           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4863             /* We can use addw/subw for the last 12 bits.  */
4864             result = remainder;
4865           else
4866             {
4867               /* Use an 8-bit shifted/rotated immediate.  */
4868               end = i - 8;
4869               if (end < 0)
4870                 end += 32;
4871               result = remainder & ((0x0ff << end)
4872                                    | ((i < end) ? (0xff >> (32 - end))
4873                                                 : 0));
4874               i -= 8;
4875             }
4876         }
4877       else
4878         {
4879           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4880              arbitrary shifts.  */
4881           i -= TARGET_ARM ? 2 : 1;
4882           continue;
4883         }
4884
4885       /* Next, see if we can do a better job with a thumb2 replicated
4886          constant.
4887
4888          We do it this way around to catch the cases like 0x01F001E0 where
4889          two 8-bit immediates would work, but a replicated constant would
4890          make it worse.
4891
4892          TODO: 16-bit constants that don't clear all the bits, but still win.
4893          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4894       if (TARGET_THUMB2)
4895         {
4896           b1 = (remainder & 0xff000000) >> 24;
4897           b2 = (remainder & 0x00ff0000) >> 16;
4898           b3 = (remainder & 0x0000ff00) >> 8;
4899           b4 = remainder & 0xff;
4900
4901           if (loc > 24)
4902             {
4903               /* The 8-bit immediate already found clears b1 (and maybe b2),
4904                  but must leave b3 and b4 alone.  */
4905
4906               /* First try to find a 32-bit replicated constant that clears
4907                  almost everything.  We can assume that we can't do it in one,
4908                  or else we wouldn't be here.  */
4909               unsigned int tmp = b1 & b2 & b3 & b4;
4910               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4911                                   + (tmp << 24);
4912               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4913                                             + (tmp == b3) + (tmp == b4);
4914               if (tmp
4915                   && (matching_bytes >= 3
4916                       || (matching_bytes == 2
4917                           && const_ok_for_op (remainder & ~tmp2, code))))
4918                 {
4919                   /* At least 3 of the bytes match, and the fourth has at
4920                      least as many bits set, or two of the bytes match
4921                      and it will only require one more insn to finish.  */
4922                   result = tmp2;
4923                   i = tmp != b1 ? 32
4924                       : tmp != b2 ? 24
4925                       : tmp != b3 ? 16
4926                       : 8;
4927                 }
4928
4929               /* Second, try to find a 16-bit replicated constant that can
4930                  leave three of the bytes clear.  If b2 or b4 is already
4931                  zero, then we can.  If the 8-bit from above would not
4932                  clear b2 anyway, then we still win.  */
4933               else if (b1 == b3 && (!b2 || !b4
4934                                || (remainder & 0x00ff0000 & ~result)))
4935                 {
4936                   result = remainder & 0xff00ff00;
4937                   i = 24;
4938                 }
4939             }
4940           else if (loc > 16)
4941             {
4942               /* The 8-bit immediate already found clears b2 (and maybe b3)
4943                  and we don't get here unless b1 is alredy clear, but it will
4944                  leave b4 unchanged.  */
4945
4946               /* If we can clear b2 and b4 at once, then we win, since the
4947                  8-bits couldn't possibly reach that far.  */
4948               if (b2 == b4)
4949                 {
4950                   result = remainder & 0x00ff00ff;
4951                   i = 16;
4952                 }
4953             }
4954         }
4955
4956       return_sequence->i[insns++] = result;
4957       remainder &= ~result;
4958
4959       if (code == SET || code == MINUS)
4960         code = PLUS;
4961     }
4962   while (remainder);
4963
4964   return insns;
4965 }
4966
4967 /* Emit an instruction with the indicated PATTERN.  If COND is
4968    non-NULL, conditionalize the execution of the instruction on COND
4969    being true.  */
4970
4971 static void
4972 emit_constant_insn (rtx cond, rtx pattern)
4973 {
4974   if (cond)
4975     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4976   emit_insn (pattern);
4977 }
4978
4979 /* As above, but extra parameter GENERATE which, if clear, suppresses
4980    RTL generation.  */
4981
4982 static int
4983 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4984                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4985                   int subtargets, int generate)
4986 {
4987   int can_invert = 0;
4988   int can_negate = 0;
4989   int final_invert = 0;
4990   int i;
4991   int set_sign_bit_copies = 0;
4992   int clear_sign_bit_copies = 0;
4993   int clear_zero_bit_copies = 0;
4994   int set_zero_bit_copies = 0;
4995   int insns = 0, neg_insns, inv_insns;
4996   unsigned HOST_WIDE_INT temp1, temp2;
4997   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4998   struct four_ints *immediates;
4999   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5000
5001   /* Find out which operations are safe for a given CODE.  Also do a quick
5002      check for degenerate cases; these can occur when DImode operations
5003      are split.  */
5004   switch (code)
5005     {
5006     case SET:
5007       can_invert = 1;
5008       break;
5009
5010     case PLUS:
5011       can_negate = 1;
5012       break;
5013
5014     case IOR:
5015       if (remainder == 0xffffffff)
5016         {
5017           if (generate)
5018             emit_constant_insn (cond,
5019                                 gen_rtx_SET (target,
5020                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5021           return 1;
5022         }
5023
5024       if (remainder == 0)
5025         {
5026           if (reload_completed && rtx_equal_p (target, source))
5027             return 0;
5028
5029           if (generate)
5030             emit_constant_insn (cond, gen_rtx_SET (target, source));
5031           return 1;
5032         }
5033       break;
5034
5035     case AND:
5036       if (remainder == 0)
5037         {
5038           if (generate)
5039             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5040           return 1;
5041         }
5042       if (remainder == 0xffffffff)
5043         {
5044           if (reload_completed && rtx_equal_p (target, source))
5045             return 0;
5046           if (generate)
5047             emit_constant_insn (cond, gen_rtx_SET (target, source));
5048           return 1;
5049         }
5050       can_invert = 1;
5051       break;
5052
5053     case XOR:
5054       if (remainder == 0)
5055         {
5056           if (reload_completed && rtx_equal_p (target, source))
5057             return 0;
5058           if (generate)
5059             emit_constant_insn (cond, gen_rtx_SET (target, source));
5060           return 1;
5061         }
5062
5063       if (remainder == 0xffffffff)
5064         {
5065           if (generate)
5066             emit_constant_insn (cond,
5067                                 gen_rtx_SET (target,
5068                                              gen_rtx_NOT (mode, source)));
5069           return 1;
5070         }
5071       final_invert = 1;
5072       break;
5073
5074     case MINUS:
5075       /* We treat MINUS as (val - source), since (source - val) is always
5076          passed as (source + (-val)).  */
5077       if (remainder == 0)
5078         {
5079           if (generate)
5080             emit_constant_insn (cond,
5081                                 gen_rtx_SET (target,
5082                                              gen_rtx_NEG (mode, source)));
5083           return 1;
5084         }
5085       if (const_ok_for_arm (val))
5086         {
5087           if (generate)
5088             emit_constant_insn (cond,
5089                                 gen_rtx_SET (target,
5090                                              gen_rtx_MINUS (mode, GEN_INT (val),
5091                                                             source)));
5092           return 1;
5093         }
5094
5095       break;
5096
5097     default:
5098       gcc_unreachable ();
5099     }
5100
5101   /* If we can do it in one insn get out quickly.  */
5102   if (const_ok_for_op (val, code))
5103     {
5104       if (generate)
5105         emit_constant_insn (cond,
5106                             gen_rtx_SET (target,
5107                                          (source
5108                                           ? gen_rtx_fmt_ee (code, mode, source,
5109                                                             GEN_INT (val))
5110                                           : GEN_INT (val))));
5111       return 1;
5112     }
5113
5114   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5115      insn.  */
5116   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5117       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5118     {
5119       if (generate)
5120         {
5121           if (mode == SImode && i == 16)
5122             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5123                smaller insn.  */
5124             emit_constant_insn (cond,
5125                                 gen_zero_extendhisi2
5126                                 (target, gen_lowpart (HImode, source)));
5127           else
5128             /* Extz only supports SImode, but we can coerce the operands
5129                into that mode.  */
5130             emit_constant_insn (cond,
5131                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5132                                               gen_lowpart (SImode, source),
5133                                               GEN_INT (i), const0_rtx));
5134         }
5135
5136       return 1;
5137     }
5138
5139   /* Calculate a few attributes that may be useful for specific
5140      optimizations.  */
5141   /* Count number of leading zeros.  */
5142   for (i = 31; i >= 0; i--)
5143     {
5144       if ((remainder & (1 << i)) == 0)
5145         clear_sign_bit_copies++;
5146       else
5147         break;
5148     }
5149
5150   /* Count number of leading 1's.  */
5151   for (i = 31; i >= 0; i--)
5152     {
5153       if ((remainder & (1 << i)) != 0)
5154         set_sign_bit_copies++;
5155       else
5156         break;
5157     }
5158
5159   /* Count number of trailing zero's.  */
5160   for (i = 0; i <= 31; i++)
5161     {
5162       if ((remainder & (1 << i)) == 0)
5163         clear_zero_bit_copies++;
5164       else
5165         break;
5166     }
5167
5168   /* Count number of trailing 1's.  */
5169   for (i = 0; i <= 31; i++)
5170     {
5171       if ((remainder & (1 << i)) != 0)
5172         set_zero_bit_copies++;
5173       else
5174         break;
5175     }
5176
5177   switch (code)
5178     {
5179     case SET:
5180       /* See if we can do this by sign_extending a constant that is known
5181          to be negative.  This is a good, way of doing it, since the shift
5182          may well merge into a subsequent insn.  */
5183       if (set_sign_bit_copies > 1)
5184         {
5185           if (const_ok_for_arm
5186               (temp1 = ARM_SIGN_EXTEND (remainder
5187                                         << (set_sign_bit_copies - 1))))
5188             {
5189               if (generate)
5190                 {
5191                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5192                   emit_constant_insn (cond,
5193                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5194                   emit_constant_insn (cond,
5195                                       gen_ashrsi3 (target, new_src,
5196                                                    GEN_INT (set_sign_bit_copies - 1)));
5197                 }
5198               return 2;
5199             }
5200           /* For an inverted constant, we will need to set the low bits,
5201              these will be shifted out of harm's way.  */
5202           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5203           if (const_ok_for_arm (~temp1))
5204             {
5205               if (generate)
5206                 {
5207                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5208                   emit_constant_insn (cond,
5209                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5210                   emit_constant_insn (cond,
5211                                       gen_ashrsi3 (target, new_src,
5212                                                    GEN_INT (set_sign_bit_copies - 1)));
5213                 }
5214               return 2;
5215             }
5216         }
5217
5218       /* See if we can calculate the value as the difference between two
5219          valid immediates.  */
5220       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5221         {
5222           int topshift = clear_sign_bit_copies & ~1;
5223
5224           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5225                                    & (0xff000000 >> topshift));
5226
5227           /* If temp1 is zero, then that means the 9 most significant
5228              bits of remainder were 1 and we've caused it to overflow.
5229              When topshift is 0 we don't need to do anything since we
5230              can borrow from 'bit 32'.  */
5231           if (temp1 == 0 && topshift != 0)
5232             temp1 = 0x80000000 >> (topshift - 1);
5233
5234           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5235
5236           if (const_ok_for_arm (temp2))
5237             {
5238               if (generate)
5239                 {
5240                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241                   emit_constant_insn (cond,
5242                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5243                   emit_constant_insn (cond,
5244                                       gen_addsi3 (target, new_src,
5245                                                   GEN_INT (-temp2)));
5246                 }
5247
5248               return 2;
5249             }
5250         }
5251
5252       /* See if we can generate this by setting the bottom (or the top)
5253          16 bits, and then shifting these into the other half of the
5254          word.  We only look for the simplest cases, to do more would cost
5255          too much.  Be careful, however, not to generate this when the
5256          alternative would take fewer insns.  */
5257       if (val & 0xffff0000)
5258         {
5259           temp1 = remainder & 0xffff0000;
5260           temp2 = remainder & 0x0000ffff;
5261
5262           /* Overlaps outside this range are best done using other methods.  */
5263           for (i = 9; i < 24; i++)
5264             {
5265               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5266                   && !const_ok_for_arm (temp2))
5267                 {
5268                   rtx new_src = (subtargets
5269                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5270                                  : target);
5271                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5272                                             source, subtargets, generate);
5273                   source = new_src;
5274                   if (generate)
5275                     emit_constant_insn
5276                       (cond,
5277                        gen_rtx_SET
5278                        (target,
5279                         gen_rtx_IOR (mode,
5280                                      gen_rtx_ASHIFT (mode, source,
5281                                                      GEN_INT (i)),
5282                                      source)));
5283                   return insns + 1;
5284                 }
5285             }
5286
5287           /* Don't duplicate cases already considered.  */
5288           for (i = 17; i < 24; i++)
5289             {
5290               if (((temp1 | (temp1 >> i)) == remainder)
5291                   && !const_ok_for_arm (temp1))
5292                 {
5293                   rtx new_src = (subtargets
5294                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5295                                  : target);
5296                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5297                                             source, subtargets, generate);
5298                   source = new_src;
5299                   if (generate)
5300                     emit_constant_insn
5301                       (cond,
5302                        gen_rtx_SET (target,
5303                                     gen_rtx_IOR
5304                                     (mode,
5305                                      gen_rtx_LSHIFTRT (mode, source,
5306                                                        GEN_INT (i)),
5307                                      source)));
5308                   return insns + 1;
5309                 }
5310             }
5311         }
5312       break;
5313
5314     case IOR:
5315     case XOR:
5316       /* If we have IOR or XOR, and the constant can be loaded in a
5317          single instruction, and we can find a temporary to put it in,
5318          then this can be done in two instructions instead of 3-4.  */
5319       if (subtargets
5320           /* TARGET can't be NULL if SUBTARGETS is 0 */
5321           || (reload_completed && !reg_mentioned_p (target, source)))
5322         {
5323           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5324             {
5325               if (generate)
5326                 {
5327                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5328
5329                   emit_constant_insn (cond,
5330                                       gen_rtx_SET (sub, GEN_INT (val)));
5331                   emit_constant_insn (cond,
5332                                       gen_rtx_SET (target,
5333                                                    gen_rtx_fmt_ee (code, mode,
5334                                                                    source, sub)));
5335                 }
5336               return 2;
5337             }
5338         }
5339
5340       if (code == XOR)
5341         break;
5342
5343       /*  Convert.
5344           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5345                              and the remainder 0s for e.g. 0xfff00000)
5346           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5347
5348           This can be done in 2 instructions by using shifts with mov or mvn.
5349           e.g. for
5350           x = x | 0xfff00000;
5351           we generate.
5352           mvn   r0, r0, asl #12
5353           mvn   r0, r0, lsr #12  */
5354       if (set_sign_bit_copies > 8
5355           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5356         {
5357           if (generate)
5358             {
5359               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360               rtx shift = GEN_INT (set_sign_bit_copies);
5361
5362               emit_constant_insn
5363                 (cond,
5364                  gen_rtx_SET (sub,
5365                               gen_rtx_NOT (mode,
5366                                            gen_rtx_ASHIFT (mode,
5367                                                            source,
5368                                                            shift))));
5369               emit_constant_insn
5370                 (cond,
5371                  gen_rtx_SET (target,
5372                               gen_rtx_NOT (mode,
5373                                            gen_rtx_LSHIFTRT (mode, sub,
5374                                                              shift))));
5375             }
5376           return 2;
5377         }
5378
5379       /* Convert
5380           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5381            to
5382           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5383
5384           For eg. r0 = r0 | 0xfff
5385                mvn      r0, r0, lsr #12
5386                mvn      r0, r0, asl #12
5387
5388       */
5389       if (set_zero_bit_copies > 8
5390           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5391         {
5392           if (generate)
5393             {
5394               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5395               rtx shift = GEN_INT (set_zero_bit_copies);
5396
5397               emit_constant_insn
5398                 (cond,
5399                  gen_rtx_SET (sub,
5400                               gen_rtx_NOT (mode,
5401                                            gen_rtx_LSHIFTRT (mode,
5402                                                              source,
5403                                                              shift))));
5404               emit_constant_insn
5405                 (cond,
5406                  gen_rtx_SET (target,
5407                               gen_rtx_NOT (mode,
5408                                            gen_rtx_ASHIFT (mode, sub,
5409                                                            shift))));
5410             }
5411           return 2;
5412         }
5413
5414       /* This will never be reached for Thumb2 because orn is a valid
5415          instruction. This is for Thumb1 and the ARM 32 bit cases.
5416
5417          x = y | constant (such that ~constant is a valid constant)
5418          Transform this to
5419          x = ~(~y & ~constant).
5420       */
5421       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5422         {
5423           if (generate)
5424             {
5425               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426               emit_constant_insn (cond,
5427                                   gen_rtx_SET (sub,
5428                                                gen_rtx_NOT (mode, source)));
5429               source = sub;
5430               if (subtargets)
5431                 sub = gen_reg_rtx (mode);
5432               emit_constant_insn (cond,
5433                                   gen_rtx_SET (sub,
5434                                                gen_rtx_AND (mode, source,
5435                                                             GEN_INT (temp1))));
5436               emit_constant_insn (cond,
5437                                   gen_rtx_SET (target,
5438                                                gen_rtx_NOT (mode, sub)));
5439             }
5440           return 3;
5441         }
5442       break;
5443
5444     case AND:
5445       /* See if two shifts will do 2 or more insn's worth of work.  */
5446       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5447         {
5448           HOST_WIDE_INT shift_mask = ((0xffffffff
5449                                        << (32 - clear_sign_bit_copies))
5450                                       & 0xffffffff);
5451
5452           if ((remainder | shift_mask) != 0xffffffff)
5453             {
5454               HOST_WIDE_INT new_val
5455                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5456
5457               if (generate)
5458                 {
5459                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5461                                             new_src, source, subtargets, 1);
5462                   source = new_src;
5463                 }
5464               else
5465                 {
5466                   rtx targ = subtargets ? NULL_RTX : target;
5467                   insns = arm_gen_constant (AND, mode, cond, new_val,
5468                                             targ, source, subtargets, 0);
5469                 }
5470             }
5471
5472           if (generate)
5473             {
5474               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5475               rtx shift = GEN_INT (clear_sign_bit_copies);
5476
5477               emit_insn (gen_ashlsi3 (new_src, source, shift));
5478               emit_insn (gen_lshrsi3 (target, new_src, shift));
5479             }
5480
5481           return insns + 2;
5482         }
5483
5484       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5485         {
5486           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5487
5488           if ((remainder | shift_mask) != 0xffffffff)
5489             {
5490               HOST_WIDE_INT new_val
5491                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5492               if (generate)
5493                 {
5494                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5495
5496                   insns = arm_gen_constant (AND, mode, cond, new_val,
5497                                             new_src, source, subtargets, 1);
5498                   source = new_src;
5499                 }
5500               else
5501                 {
5502                   rtx targ = subtargets ? NULL_RTX : target;
5503
5504                   insns = arm_gen_constant (AND, mode, cond, new_val,
5505                                             targ, source, subtargets, 0);
5506                 }
5507             }
5508
5509           if (generate)
5510             {
5511               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5512               rtx shift = GEN_INT (clear_zero_bit_copies);
5513
5514               emit_insn (gen_lshrsi3 (new_src, source, shift));
5515               emit_insn (gen_ashlsi3 (target, new_src, shift));
5516             }
5517
5518           return insns + 2;
5519         }
5520
5521       break;
5522
5523     default:
5524       break;
5525     }
5526
5527   /* Calculate what the instruction sequences would be if we generated it
5528      normally, negated, or inverted.  */
5529   if (code == AND)
5530     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5531     insns = 99;
5532   else
5533     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5534
5535   if (can_negate)
5536     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5537                                             &neg_immediates);
5538   else
5539     neg_insns = 99;
5540
5541   if (can_invert || final_invert)
5542     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5543                                             &inv_immediates);
5544   else
5545     inv_insns = 99;
5546
5547   immediates = &pos_immediates;
5548
5549   /* Is the negated immediate sequence more efficient?  */
5550   if (neg_insns < insns && neg_insns <= inv_insns)
5551     {
5552       insns = neg_insns;
5553       immediates = &neg_immediates;
5554     }
5555   else
5556     can_negate = 0;
5557
5558   /* Is the inverted immediate sequence more efficient?
5559      We must allow for an extra NOT instruction for XOR operations, although
5560      there is some chance that the final 'mvn' will get optimized later.  */
5561   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5562     {
5563       insns = inv_insns;
5564       immediates = &inv_immediates;
5565     }
5566   else
5567     {
5568       can_invert = 0;
5569       final_invert = 0;
5570     }
5571
5572   /* Now output the chosen sequence as instructions.  */
5573   if (generate)
5574     {
5575       for (i = 0; i < insns; i++)
5576         {
5577           rtx new_src, temp1_rtx;
5578
5579           temp1 = immediates->i[i];
5580
5581           if (code == SET || code == MINUS)
5582             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5583           else if ((final_invert || i < (insns - 1)) && subtargets)
5584             new_src = gen_reg_rtx (mode);
5585           else
5586             new_src = target;
5587
5588           if (can_invert)
5589             temp1 = ~temp1;
5590           else if (can_negate)
5591             temp1 = -temp1;
5592
5593           temp1 = trunc_int_for_mode (temp1, mode);
5594           temp1_rtx = GEN_INT (temp1);
5595
5596           if (code == SET)
5597             ;
5598           else if (code == MINUS)
5599             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5600           else
5601             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5602
5603           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5604           source = new_src;
5605
5606           if (code == SET)
5607             {
5608               can_negate = can_invert;
5609               can_invert = 0;
5610               code = PLUS;
5611             }
5612           else if (code == MINUS)
5613             code = PLUS;
5614         }
5615     }
5616
5617   if (final_invert)
5618     {
5619       if (generate)
5620         emit_constant_insn (cond, gen_rtx_SET (target,
5621                                                gen_rtx_NOT (mode, source)));
5622       insns++;
5623     }
5624
5625   return insns;
5626 }
5627
5628 /* Return TRUE if op is a constant where both the low and top words are
5629    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5630    we do not have RSC in that case.  */
5631 static bool
5632 arm_const_double_prefer_rsbs_rsc (rtx op)
5633 {
5634   /* Thumb lacks RSC, so we never prefer that sequence.  */
5635   if (TARGET_THUMB || !CONST_INT_P (op))
5636     return false;
5637   HOST_WIDE_INT hi, lo;
5638   lo = UINTVAL (op) & 0xffffffffULL;
5639   hi = UINTVAL (op) >> 32;
5640   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5641 }
5642
5643 /* Canonicalize a comparison so that we are more likely to recognize it.
5644    This can be done for a few constant compares, where we can make the
5645    immediate value easier to load.  */
5646
5647 static void
5648 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5649                              bool op0_preserve_value)
5650 {
5651   machine_mode mode;
5652   unsigned HOST_WIDE_INT i, maxval;
5653
5654   mode = GET_MODE (*op0);
5655   if (mode == VOIDmode)
5656     mode = GET_MODE (*op1);
5657
5658   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5659
5660   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5661      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5662      either reversed or (for constant OP1) adjusted to GE/LT.
5663      Similarly for GTU/LEU in Thumb mode.  */
5664   if (mode == DImode)
5665     {
5666
5667       if (*code == GT || *code == LE
5668           || *code == GTU || *code == LEU)
5669         {
5670           /* Missing comparison.  First try to use an available
5671              comparison.  */
5672           if (CONST_INT_P (*op1))
5673             {
5674               i = INTVAL (*op1);
5675               switch (*code)
5676                 {
5677                 case GT:
5678                 case LE:
5679                   if (i != maxval)
5680                     {
5681                       /* Try to convert to GE/LT, unless that would be more
5682                          expensive.  */
5683                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5684                           && arm_const_double_prefer_rsbs_rsc (*op1))
5685                         return;
5686                       *op1 = GEN_INT (i + 1);
5687                       *code = *code == GT ? GE : LT;
5688                     }
5689                   else
5690                     {
5691                       /* GT maxval is always false, LE maxval is always true.
5692                          We can't fold that away here as we must make a
5693                          comparison, but we can fold them to comparisons
5694                          with the same result that can be handled:
5695                            op0 GT maxval -> op0 LT minval
5696                            op0 LE maxval -> op0 GE minval
5697                          where minval = (-maxval - 1).  */
5698                       *op1 = GEN_INT (-maxval - 1);
5699                       *code = *code == GT ? LT : GE;
5700                     }
5701                   return;
5702
5703                 case GTU:
5704                 case LEU:
5705                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5706                     {
5707                       /* Try to convert to GEU/LTU, unless that would
5708                          be more expensive.  */
5709                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5710                           && arm_const_double_prefer_rsbs_rsc (*op1))
5711                         return;
5712                       *op1 = GEN_INT (i + 1);
5713                       *code = *code == GTU ? GEU : LTU;
5714                     }
5715                   else
5716                     {
5717                       /* GTU ~0 is always false, LEU ~0 is always true.
5718                          We can't fold that away here as we must make a
5719                          comparison, but we can fold them to comparisons
5720                          with the same result that can be handled:
5721                            op0 GTU ~0 -> op0 LTU 0
5722                            op0 LEU ~0 -> op0 GEU 0.  */
5723                       *op1 = const0_rtx;
5724                       *code = *code == GTU ? LTU : GEU;
5725                     }
5726                   return;
5727
5728                 default:
5729                   gcc_unreachable ();
5730                 }
5731             }
5732
5733           if (!op0_preserve_value)
5734             {
5735               std::swap (*op0, *op1);
5736               *code = (int)swap_condition ((enum rtx_code)*code);
5737             }
5738         }
5739       return;
5740     }
5741
5742   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5743      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5744      to facilitate possible combining with a cmp into 'ands'.  */
5745   if (mode == SImode
5746       && GET_CODE (*op0) == ZERO_EXTEND
5747       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5748       && GET_MODE (XEXP (*op0, 0)) == QImode
5749       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5750       && subreg_lowpart_p (XEXP (*op0, 0))
5751       && *op1 == const0_rtx)
5752     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5753                         GEN_INT (255));
5754
5755   /* Comparisons smaller than DImode.  Only adjust comparisons against
5756      an out-of-range constant.  */
5757   if (!CONST_INT_P (*op1)
5758       || const_ok_for_arm (INTVAL (*op1))
5759       || const_ok_for_arm (- INTVAL (*op1)))
5760     return;
5761
5762   i = INTVAL (*op1);
5763
5764   switch (*code)
5765     {
5766     case EQ:
5767     case NE:
5768       return;
5769
5770     case GT:
5771     case LE:
5772       if (i != maxval
5773           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5774         {
5775           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5776           *code = *code == GT ? GE : LT;
5777           return;
5778         }
5779       break;
5780
5781     case GE:
5782     case LT:
5783       if (i != ~maxval
5784           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5785         {
5786           *op1 = GEN_INT (i - 1);
5787           *code = *code == GE ? GT : LE;
5788           return;
5789         }
5790       break;
5791
5792     case GTU:
5793     case LEU:
5794       if (i != ~((unsigned HOST_WIDE_INT) 0)
5795           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5796         {
5797           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5798           *code = *code == GTU ? GEU : LTU;
5799           return;
5800         }
5801       break;
5802
5803     case GEU:
5804     case LTU:
5805       if (i != 0
5806           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5807         {
5808           *op1 = GEN_INT (i - 1);
5809           *code = *code == GEU ? GTU : LEU;
5810           return;
5811         }
5812       break;
5813
5814     default:
5815       gcc_unreachable ();
5816     }
5817 }
5818
5819
5820 /* Define how to find the value returned by a function.  */
5821
5822 static rtx
5823 arm_function_value(const_tree type, const_tree func,
5824                    bool outgoing ATTRIBUTE_UNUSED)
5825 {
5826   machine_mode mode;
5827   int unsignedp ATTRIBUTE_UNUSED;
5828   rtx r ATTRIBUTE_UNUSED;
5829
5830   mode = TYPE_MODE (type);
5831
5832   if (TARGET_AAPCS_BASED)
5833     return aapcs_allocate_return_reg (mode, type, func);
5834
5835   /* Promote integer types.  */
5836   if (INTEGRAL_TYPE_P (type))
5837     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5838
5839   /* Promotes small structs returned in a register to full-word size
5840      for big-endian AAPCS.  */
5841   if (arm_return_in_msb (type))
5842     {
5843       HOST_WIDE_INT size = int_size_in_bytes (type);
5844       if (size % UNITS_PER_WORD != 0)
5845         {
5846           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5847           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5848         }
5849     }
5850
5851   return arm_libcall_value_1 (mode);
5852 }
5853
5854 /* libcall hashtable helpers.  */
5855
5856 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5857 {
5858   static inline hashval_t hash (const rtx_def *);
5859   static inline bool equal (const rtx_def *, const rtx_def *);
5860   static inline void remove (rtx_def *);
5861 };
5862
5863 inline bool
5864 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5865 {
5866   return rtx_equal_p (p1, p2);
5867 }
5868
5869 inline hashval_t
5870 libcall_hasher::hash (const rtx_def *p1)
5871 {
5872   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5873 }
5874
5875 typedef hash_table<libcall_hasher> libcall_table_type;
5876
5877 static void
5878 add_libcall (libcall_table_type *htab, rtx libcall)
5879 {
5880   *htab->find_slot (libcall, INSERT) = libcall;
5881 }
5882
5883 static bool
5884 arm_libcall_uses_aapcs_base (const_rtx libcall)
5885 {
5886   static bool init_done = false;
5887   static libcall_table_type *libcall_htab = NULL;
5888
5889   if (!init_done)
5890     {
5891       init_done = true;
5892
5893       libcall_htab = new libcall_table_type (31);
5894       add_libcall (libcall_htab,
5895                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5896       add_libcall (libcall_htab,
5897                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5898       add_libcall (libcall_htab,
5899                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5900       add_libcall (libcall_htab,
5901                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5902
5903       add_libcall (libcall_htab,
5904                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5905       add_libcall (libcall_htab,
5906                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5907       add_libcall (libcall_htab,
5908                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5909       add_libcall (libcall_htab,
5910                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5911
5912       add_libcall (libcall_htab,
5913                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5914       add_libcall (libcall_htab,
5915                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5916       add_libcall (libcall_htab,
5917                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5918       add_libcall (libcall_htab,
5919                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5920       add_libcall (libcall_htab,
5921                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5922       add_libcall (libcall_htab,
5923                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5924       add_libcall (libcall_htab,
5925                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5926       add_libcall (libcall_htab,
5927                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5928       add_libcall (libcall_htab,
5929                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5930       add_libcall (libcall_htab,
5931                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5932
5933       /* Values from double-precision helper functions are returned in core
5934          registers if the selected core only supports single-precision
5935          arithmetic, even if we are using the hard-float ABI.  The same is
5936          true for single-precision helpers except in case of MVE, because in
5937          MVE we will be using the hard-float ABI on a CPU which doesn't support
5938          single-precision operations in hardware.  In MVE the following check
5939          enables use of emulation for the single-precision arithmetic
5940          operations.  */
5941       if (TARGET_HAVE_MVE)
5942         {
5943           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5944           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5945           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5946           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5947           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5948           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5949           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5950           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5951           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5952           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5953           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5954         }
5955       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5956       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5957       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5958       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5959       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5960       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5961       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5962       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5963       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5964       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5965       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5966       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5967                                                         SFmode));
5968       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5969                                                         DFmode));
5970       add_libcall (libcall_htab,
5971                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5972     }
5973
5974   return libcall && libcall_htab->find (libcall) != NULL;
5975 }
5976
5977 static rtx
5978 arm_libcall_value_1 (machine_mode mode)
5979 {
5980   if (TARGET_AAPCS_BASED)
5981     return aapcs_libcall_value (mode);
5982   else if (TARGET_IWMMXT_ABI
5983            && arm_vector_mode_supported_p (mode))
5984     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5985   else
5986     return gen_rtx_REG (mode, ARG_REGISTER (1));
5987 }
5988
5989 /* Define how to find the value returned by a library function
5990    assuming the value has mode MODE.  */
5991
5992 static rtx
5993 arm_libcall_value (machine_mode mode, const_rtx libcall)
5994 {
5995   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5996       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5997     {
5998       /* The following libcalls return their result in integer registers,
5999          even though they return a floating point value.  */
6000       if (arm_libcall_uses_aapcs_base (libcall))
6001         return gen_rtx_REG (mode, ARG_REGISTER(1));
6002
6003     }
6004
6005   return arm_libcall_value_1 (mode);
6006 }
6007
6008 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6009
6010 static bool
6011 arm_function_value_regno_p (const unsigned int regno)
6012 {
6013   if (regno == ARG_REGISTER (1)
6014       || (TARGET_32BIT
6015           && TARGET_AAPCS_BASED
6016           && TARGET_HARD_FLOAT
6017           && regno == FIRST_VFP_REGNUM)
6018       || (TARGET_IWMMXT_ABI
6019           && regno == FIRST_IWMMXT_REGNUM))
6020     return true;
6021
6022   return false;
6023 }
6024
6025 /* Determine the amount of memory needed to store the possible return
6026    registers of an untyped call.  */
6027 int
6028 arm_apply_result_size (void)
6029 {
6030   int size = 16;
6031
6032   if (TARGET_32BIT)
6033     {
6034       if (TARGET_HARD_FLOAT_ABI)
6035         size += 32;
6036       if (TARGET_IWMMXT_ABI)
6037         size += 8;
6038     }
6039
6040   return size;
6041 }
6042
6043 /* Decide whether TYPE should be returned in memory (true)
6044    or in a register (false).  FNTYPE is the type of the function making
6045    the call.  */
6046 static bool
6047 arm_return_in_memory (const_tree type, const_tree fntype)
6048 {
6049   HOST_WIDE_INT size;
6050
6051   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6052
6053   if (TARGET_AAPCS_BASED)
6054     {
6055       /* Simple, non-aggregate types (ie not including vectors and
6056          complex) are always returned in a register (or registers).
6057          We don't care about which register here, so we can short-cut
6058          some of the detail.  */
6059       if (!AGGREGATE_TYPE_P (type)
6060           && TREE_CODE (type) != VECTOR_TYPE
6061           && TREE_CODE (type) != COMPLEX_TYPE)
6062         return false;
6063
6064       /* Any return value that is no larger than one word can be
6065          returned in r0.  */
6066       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6067         return false;
6068
6069       /* Check any available co-processors to see if they accept the
6070          type as a register candidate (VFP, for example, can return
6071          some aggregates in consecutive registers).  These aren't
6072          available if the call is variadic.  */
6073       if (aapcs_select_return_coproc (type, fntype) >= 0)
6074         return false;
6075
6076       /* Vector values should be returned using ARM registers, not
6077          memory (unless they're over 16 bytes, which will break since
6078          we only have four call-clobbered registers to play with).  */
6079       if (TREE_CODE (type) == VECTOR_TYPE)
6080         return (size < 0 || size > (4 * UNITS_PER_WORD));
6081
6082       /* The rest go in memory.  */
6083       return true;
6084     }
6085
6086   if (TREE_CODE (type) == VECTOR_TYPE)
6087     return (size < 0 || size > (4 * UNITS_PER_WORD));
6088
6089   if (!AGGREGATE_TYPE_P (type) &&
6090       (TREE_CODE (type) != VECTOR_TYPE))
6091     /* All simple types are returned in registers.  */
6092     return false;
6093
6094   if (arm_abi != ARM_ABI_APCS)
6095     {
6096       /* ATPCS and later return aggregate types in memory only if they are
6097          larger than a word (or are variable size).  */
6098       return (size < 0 || size > UNITS_PER_WORD);
6099     }
6100
6101   /* For the arm-wince targets we choose to be compatible with Microsoft's
6102      ARM and Thumb compilers, which always return aggregates in memory.  */
6103 #ifndef ARM_WINCE
6104   /* All structures/unions bigger than one word are returned in memory.
6105      Also catch the case where int_size_in_bytes returns -1.  In this case
6106      the aggregate is either huge or of variable size, and in either case
6107      we will want to return it via memory and not in a register.  */
6108   if (size < 0 || size > UNITS_PER_WORD)
6109     return true;
6110
6111   if (TREE_CODE (type) == RECORD_TYPE)
6112     {
6113       tree field;
6114
6115       /* For a struct the APCS says that we only return in a register
6116          if the type is 'integer like' and every addressable element
6117          has an offset of zero.  For practical purposes this means
6118          that the structure can have at most one non bit-field element
6119          and that this element must be the first one in the structure.  */
6120
6121       /* Find the first field, ignoring non FIELD_DECL things which will
6122          have been created by C++.  */
6123       /* NOTE: This code is deprecated and has not been updated to handle
6124          DECL_FIELD_ABI_IGNORED.  */
6125       for (field = TYPE_FIELDS (type);
6126            field && TREE_CODE (field) != FIELD_DECL;
6127            field = DECL_CHAIN (field))
6128         continue;
6129
6130       if (field == NULL)
6131         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6132
6133       /* Check that the first field is valid for returning in a register.  */
6134
6135       /* ... Floats are not allowed */
6136       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6137         return true;
6138
6139       /* ... Aggregates that are not themselves valid for returning in
6140          a register are not allowed.  */
6141       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142         return true;
6143
6144       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6145          since they are not addressable.  */
6146       for (field = DECL_CHAIN (field);
6147            field;
6148            field = DECL_CHAIN (field))
6149         {
6150           if (TREE_CODE (field) != FIELD_DECL)
6151             continue;
6152
6153           if (!DECL_BIT_FIELD_TYPE (field))
6154             return true;
6155         }
6156
6157       return false;
6158     }
6159
6160   if (TREE_CODE (type) == UNION_TYPE)
6161     {
6162       tree field;
6163
6164       /* Unions can be returned in registers if every element is
6165          integral, or can be returned in an integer register.  */
6166       for (field = TYPE_FIELDS (type);
6167            field;
6168            field = DECL_CHAIN (field))
6169         {
6170           if (TREE_CODE (field) != FIELD_DECL)
6171             continue;
6172
6173           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6174             return true;
6175
6176           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6177             return true;
6178         }
6179
6180       return false;
6181     }
6182 #endif /* not ARM_WINCE */
6183
6184   /* Return all other types in memory.  */
6185   return true;
6186 }
6187
6188 const struct pcs_attribute_arg
6189 {
6190   const char *arg;
6191   enum arm_pcs value;
6192 } pcs_attribute_args[] =
6193   {
6194     {"aapcs", ARM_PCS_AAPCS},
6195     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6196 #if 0
6197     /* We could recognize these, but changes would be needed elsewhere
6198      * to implement them.  */
6199     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6200     {"atpcs", ARM_PCS_ATPCS},
6201     {"apcs", ARM_PCS_APCS},
6202 #endif
6203     {NULL, ARM_PCS_UNKNOWN}
6204   };
6205
6206 static enum arm_pcs
6207 arm_pcs_from_attribute (tree attr)
6208 {
6209   const struct pcs_attribute_arg *ptr;
6210   const char *arg;
6211
6212   /* Get the value of the argument.  */
6213   if (TREE_VALUE (attr) == NULL_TREE
6214       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6215     return ARM_PCS_UNKNOWN;
6216
6217   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6218
6219   /* Check it against the list of known arguments.  */
6220   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6221     if (streq (arg, ptr->arg))
6222       return ptr->value;
6223
6224   /* An unrecognized interrupt type.  */
6225   return ARM_PCS_UNKNOWN;
6226 }
6227
6228 /* Get the PCS variant to use for this call.  TYPE is the function's type
6229    specification, DECL is the specific declartion.  DECL may be null if
6230    the call could be indirect or if this is a library call.  */
6231 static enum arm_pcs
6232 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6233 {
6234   bool user_convention = false;
6235   enum arm_pcs user_pcs = arm_pcs_default;
6236   tree attr;
6237
6238   gcc_assert (type);
6239
6240   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6241   if (attr)
6242     {
6243       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6244       user_convention = true;
6245     }
6246
6247   if (TARGET_AAPCS_BASED)
6248     {
6249       /* Detect varargs functions.  These always use the base rules
6250          (no argument is ever a candidate for a co-processor
6251          register).  */
6252       bool base_rules = stdarg_p (type);
6253
6254       if (user_convention)
6255         {
6256           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6257             sorry ("non-AAPCS derived PCS variant");
6258           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6259             error ("variadic functions must use the base AAPCS variant");
6260         }
6261
6262       if (base_rules)
6263         return ARM_PCS_AAPCS;
6264       else if (user_convention)
6265         return user_pcs;
6266 #if 0
6267       /* Unfortunately, this is not safe and can lead to wrong code
6268          being generated (PR96882).  Not all calls into the back-end
6269          pass the DECL, so it is unsafe to make any PCS-changing
6270          decisions based on it.  In particular the RETURN_IN_MEMORY
6271          hook is only ever passed a TYPE.  This needs revisiting to
6272          see if there are any partial improvements that can be
6273          re-enabled.  */
6274       else if (decl && flag_unit_at_a_time)
6275         {
6276           /* Local functions never leak outside this compilation unit,
6277              so we are free to use whatever conventions are
6278              appropriate.  */
6279           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6280           cgraph_node *local_info_node
6281             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6282           if (local_info_node && local_info_node->local)
6283             return ARM_PCS_AAPCS_LOCAL;
6284         }
6285 #endif
6286     }
6287   else if (user_convention && user_pcs != arm_pcs_default)
6288     sorry ("PCS variant");
6289
6290   /* For everything else we use the target's default.  */
6291   return arm_pcs_default;
6292 }
6293
6294
6295 static void
6296 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6297                     const_tree fntype ATTRIBUTE_UNUSED,
6298                     rtx libcall ATTRIBUTE_UNUSED,
6299                     const_tree fndecl ATTRIBUTE_UNUSED)
6300 {
6301   /* Record the unallocated VFP registers.  */
6302   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6303   pcum->aapcs_vfp_reg_alloc = 0;
6304 }
6305
6306 /* Bitmasks that indicate whether earlier versions of GCC would have
6307    taken a different path through the ABI logic.  This should result in
6308    a -Wpsabi warning if the earlier path led to a different ABI decision.
6309
6310    WARN_PSABI_EMPTY_CXX17_BASE
6311       Indicates that the type includes an artificial empty C++17 base field
6312       that, prior to GCC 10.1, would prevent the type from being treated as
6313       a HFA or HVA.  See PR94711 for details.
6314
6315    WARN_PSABI_NO_UNIQUE_ADDRESS
6316       Indicates that the type includes an empty [[no_unique_address]] field
6317       that, prior to GCC 10.1, would prevent the type from being treated as
6318       a HFA or HVA.  */
6319 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6320 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6321 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6322
6323 /* Walk down the type tree of TYPE counting consecutive base elements.
6324    If *MODEP is VOIDmode, then set it to the first valid floating point
6325    type.  If a non-floating point type is found, or if a floating point
6326    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6327    otherwise return the count in the sub-tree.
6328
6329    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6330    function has changed its behavior relative to earlier versions of GCC.
6331    Normally the argument should be nonnull and point to a zero-initialized
6332    variable.  The function then records whether the ABI decision might
6333    be affected by a known fix to the ABI logic, setting the associated
6334    WARN_PSABI_* bits if so.
6335
6336    When the argument is instead a null pointer, the function tries to
6337    simulate the behavior of GCC before all such ABI fixes were made.
6338    This is useful to check whether the function returns something
6339    different after the ABI fixes.  */
6340 static int
6341 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6342                          unsigned int *warn_psabi_flags)
6343 {
6344   machine_mode mode;
6345   HOST_WIDE_INT size;
6346
6347   switch (TREE_CODE (type))
6348     {
6349     case REAL_TYPE:
6350       mode = TYPE_MODE (type);
6351       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6352         return -1;
6353
6354       if (*modep == VOIDmode)
6355         *modep = mode;
6356
6357       if (*modep == mode)
6358         return 1;
6359
6360       break;
6361
6362     case COMPLEX_TYPE:
6363       mode = TYPE_MODE (TREE_TYPE (type));
6364       if (mode != DFmode && mode != SFmode)
6365         return -1;
6366
6367       if (*modep == VOIDmode)
6368         *modep = mode;
6369
6370       if (*modep == mode)
6371         return 2;
6372
6373       break;
6374
6375     case VECTOR_TYPE:
6376       /* Use V2SImode and V4SImode as representatives of all 64-bit
6377          and 128-bit vector types, whether or not those modes are
6378          supported with the present options.  */
6379       size = int_size_in_bytes (type);
6380       switch (size)
6381         {
6382         case 8:
6383           mode = V2SImode;
6384           break;
6385         case 16:
6386           mode = V4SImode;
6387           break;
6388         default:
6389           return -1;
6390         }
6391
6392       if (*modep == VOIDmode)
6393         *modep = mode;
6394
6395       /* Vector modes are considered to be opaque: two vectors are
6396          equivalent for the purposes of being homogeneous aggregates
6397          if they are the same size.  */
6398       if (*modep == mode)
6399         return 1;
6400
6401       break;
6402
6403     case ARRAY_TYPE:
6404       {
6405         int count;
6406         tree index = TYPE_DOMAIN (type);
6407
6408         /* Can't handle incomplete types nor sizes that are not
6409            fixed.  */
6410         if (!COMPLETE_TYPE_P (type)
6411             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6412           return -1;
6413
6414         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6415                                          warn_psabi_flags);
6416         if (count == -1
6417             || !index
6418             || !TYPE_MAX_VALUE (index)
6419             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6420             || !TYPE_MIN_VALUE (index)
6421             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6422             || count < 0)
6423           return -1;
6424
6425         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6426                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6427
6428         /* There must be no padding.  */
6429         if (wi::to_wide (TYPE_SIZE (type))
6430             != count * GET_MODE_BITSIZE (*modep))
6431           return -1;
6432
6433         return count;
6434       }
6435
6436     case RECORD_TYPE:
6437       {
6438         int count = 0;
6439         int sub_count;
6440         tree field;
6441
6442         /* Can't handle incomplete types nor sizes that are not
6443            fixed.  */
6444         if (!COMPLETE_TYPE_P (type)
6445             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6446           return -1;
6447
6448         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6449           {
6450             if (TREE_CODE (field) != FIELD_DECL)
6451               continue;
6452
6453             if (DECL_FIELD_ABI_IGNORED (field))
6454               {
6455                 /* See whether this is something that earlier versions of
6456                    GCC failed to ignore.  */
6457                 unsigned int flag;
6458                 if (lookup_attribute ("no_unique_address",
6459                                       DECL_ATTRIBUTES (field)))
6460                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6461                 else if (cxx17_empty_base_field_p (field))
6462                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6463                 else
6464                   /* No compatibility problem.  */
6465                   continue;
6466
6467                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6468                 if (warn_psabi_flags)
6469                   {
6470                     *warn_psabi_flags |= flag;
6471                     continue;
6472                   }
6473               }
6474             /* A zero-width bitfield may affect layout in some
6475                circumstances, but adds no members.  The determination
6476                of whether or not a type is an HFA is performed after
6477                layout is complete, so if the type still looks like an
6478                HFA afterwards, it is still classed as one.  This is
6479                potentially an ABI break for the hard-float ABI.  */
6480             else if (DECL_BIT_FIELD (field)
6481                      && integer_zerop (DECL_SIZE (field)))
6482               {
6483                 /* Prior to GCC-12 these fields were striped early,
6484                    hiding them from the back-end entirely and
6485                    resulting in the correct behaviour for argument
6486                    passing.  Simulate that old behaviour without
6487                    generating a warning.  */
6488                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6489                   continue;
6490                 if (warn_psabi_flags)
6491                   {
6492                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6493                     continue;
6494                   }
6495               }
6496
6497             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6498                                                  warn_psabi_flags);
6499             if (sub_count < 0)
6500               return -1;
6501             count += sub_count;
6502           }
6503
6504         /* There must be no padding.  */
6505         if (wi::to_wide (TYPE_SIZE (type))
6506             != count * GET_MODE_BITSIZE (*modep))
6507           return -1;
6508
6509         return count;
6510       }
6511
6512     case UNION_TYPE:
6513     case QUAL_UNION_TYPE:
6514       {
6515         /* These aren't very interesting except in a degenerate case.  */
6516         int count = 0;
6517         int sub_count;
6518         tree field;
6519
6520         /* Can't handle incomplete types nor sizes that are not
6521            fixed.  */
6522         if (!COMPLETE_TYPE_P (type)
6523             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6524           return -1;
6525
6526         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527           {
6528             if (TREE_CODE (field) != FIELD_DECL)
6529               continue;
6530
6531             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6532                                                  warn_psabi_flags);
6533             if (sub_count < 0)
6534               return -1;
6535             count = count > sub_count ? count : sub_count;
6536           }
6537
6538         /* There must be no padding.  */
6539         if (wi::to_wide (TYPE_SIZE (type))
6540             != count * GET_MODE_BITSIZE (*modep))
6541           return -1;
6542
6543         return count;
6544       }
6545
6546     default:
6547       break;
6548     }
6549
6550   return -1;
6551 }
6552
6553 /* Return true if PCS_VARIANT should use VFP registers.  */
6554 static bool
6555 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6556 {
6557   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6558     {
6559       static bool seen_thumb1_vfp = false;
6560
6561       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6562         {
6563           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6564           /* sorry() is not immediately fatal, so only display this once.  */
6565           seen_thumb1_vfp = true;
6566         }
6567
6568       return true;
6569     }
6570
6571   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6572     return false;
6573
6574   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6575          (TARGET_VFP_DOUBLE || !is_double));
6576 }
6577
6578 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6579    suitable for passing or returning in VFP registers for the PCS
6580    variant selected.  If it is, then *BASE_MODE is updated to contain
6581    a machine mode describing each element of the argument's type and
6582    *COUNT to hold the number of such elements.  */
6583 static bool
6584 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6585                                        machine_mode mode, const_tree type,
6586                                        machine_mode *base_mode, int *count)
6587 {
6588   machine_mode new_mode = VOIDmode;
6589
6590   /* If we have the type information, prefer that to working things
6591      out from the mode.  */
6592   if (type)
6593     {
6594       unsigned int warn_psabi_flags = 0;
6595       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6596                                               &warn_psabi_flags);
6597       if (ag_count > 0 && ag_count <= 4)
6598         {
6599           static unsigned last_reported_type_uid;
6600           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6601           int alt;
6602           if (warn_psabi
6603               && warn_psabi_flags
6604               && uid != last_reported_type_uid
6605               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6606                   != ag_count))
6607             {
6608               const char *url10
6609                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6610               const char *url12
6611                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6612               gcc_assert (alt == -1);
6613               last_reported_type_uid = uid;
6614               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6615                  qualification.  */
6616               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6617                 inform (input_location, "parameter passing for argument of "
6618                         "type %qT with %<[[no_unique_address]]%> members "
6619                         "changed %{in GCC 10.1%}",
6620                         TYPE_MAIN_VARIANT (type), url10);
6621               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6622                 inform (input_location, "parameter passing for argument of "
6623                         "type %qT when C++17 is enabled changed to match "
6624                         "C++14 %{in GCC 10.1%}",
6625                         TYPE_MAIN_VARIANT (type), url10);
6626               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6627                 inform (input_location, "parameter passing for argument of "
6628                         "type %qT changed %{in GCC 12.1%}",
6629                         TYPE_MAIN_VARIANT (type), url12);
6630             }
6631           *count = ag_count;
6632         }
6633       else
6634         return false;
6635     }
6636   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6637            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6638            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6639     {
6640       *count = 1;
6641       new_mode = mode;
6642     }
6643   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6644     {
6645       *count = 2;
6646       new_mode = (mode == DCmode ? DFmode : SFmode);
6647     }
6648   else
6649     return false;
6650
6651
6652   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6653     return false;
6654
6655   *base_mode = new_mode;
6656
6657   if (TARGET_GENERAL_REGS_ONLY)
6658     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6659            type);
6660
6661   return true;
6662 }
6663
6664 static bool
6665 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6666                                machine_mode mode, const_tree type)
6667 {
6668   int count ATTRIBUTE_UNUSED;
6669   machine_mode ag_mode ATTRIBUTE_UNUSED;
6670
6671   if (!use_vfp_abi (pcs_variant, false))
6672     return false;
6673   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6674                                                 &ag_mode, &count);
6675 }
6676
6677 static bool
6678 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6679                              const_tree type)
6680 {
6681   if (!use_vfp_abi (pcum->pcs_variant, false))
6682     return false;
6683
6684   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6685                                                 &pcum->aapcs_vfp_rmode,
6686                                                 &pcum->aapcs_vfp_rcount);
6687 }
6688
6689 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6690    for the behaviour of this function.  */
6691
6692 static bool
6693 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6694                     const_tree type  ATTRIBUTE_UNUSED)
6695 {
6696   int rmode_size
6697     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6698   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6699   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6700   int regno;
6701
6702   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6703     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6704       {
6705         pcum->aapcs_vfp_reg_alloc = mask << regno;
6706         if (mode == BLKmode
6707             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6708             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6709           {
6710             int i;
6711             int rcount = pcum->aapcs_vfp_rcount;
6712             int rshift = shift;
6713             machine_mode rmode = pcum->aapcs_vfp_rmode;
6714             rtx par;
6715             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6716               {
6717                 /* Avoid using unsupported vector modes.  */
6718                 if (rmode == V2SImode)
6719                   rmode = DImode;
6720                 else if (rmode == V4SImode)
6721                   {
6722                     rmode = DImode;
6723                     rcount *= 2;
6724                     rshift /= 2;
6725                   }
6726               }
6727             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6728             for (i = 0; i < rcount; i++)
6729               {
6730                 rtx tmp = gen_rtx_REG (rmode,
6731                                        FIRST_VFP_REGNUM + regno + i * rshift);
6732                 tmp = gen_rtx_EXPR_LIST
6733                   (VOIDmode, tmp,
6734                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6735                 XVECEXP (par, 0, i) = tmp;
6736               }
6737
6738             pcum->aapcs_reg = par;
6739           }
6740         else
6741           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6742         return true;
6743       }
6744   return false;
6745 }
6746
6747 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6748    comment there for the behaviour of this function.  */
6749
6750 static rtx
6751 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6752                                machine_mode mode,
6753                                const_tree type ATTRIBUTE_UNUSED)
6754 {
6755   if (!use_vfp_abi (pcs_variant, false))
6756     return NULL;
6757
6758   if (mode == BLKmode
6759       || (GET_MODE_CLASS (mode) == MODE_INT
6760           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6761           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6762     {
6763       int count;
6764       machine_mode ag_mode;
6765       int i;
6766       rtx par;
6767       int shift;
6768
6769       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6770                                              &ag_mode, &count);
6771
6772       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6773         {
6774           if (ag_mode == V2SImode)
6775             ag_mode = DImode;
6776           else if (ag_mode == V4SImode)
6777             {
6778               ag_mode = DImode;
6779               count *= 2;
6780             }
6781         }
6782       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6783       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6784       for (i = 0; i < count; i++)
6785         {
6786           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6787           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6788                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6789           XVECEXP (par, 0, i) = tmp;
6790         }
6791
6792       return par;
6793     }
6794
6795   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6796 }
6797
6798 static void
6799 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6800                    machine_mode mode  ATTRIBUTE_UNUSED,
6801                    const_tree type  ATTRIBUTE_UNUSED)
6802 {
6803   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6804   pcum->aapcs_vfp_reg_alloc = 0;
6805   return;
6806 }
6807
6808 #define AAPCS_CP(X)                             \
6809   {                                             \
6810     aapcs_ ## X ## _cum_init,                   \
6811     aapcs_ ## X ## _is_call_candidate,          \
6812     aapcs_ ## X ## _allocate,                   \
6813     aapcs_ ## X ## _is_return_candidate,        \
6814     aapcs_ ## X ## _allocate_return_reg,        \
6815     aapcs_ ## X ## _advance                     \
6816   }
6817
6818 /* Table of co-processors that can be used to pass arguments in
6819    registers.  Idealy no arugment should be a candidate for more than
6820    one co-processor table entry, but the table is processed in order
6821    and stops after the first match.  If that entry then fails to put
6822    the argument into a co-processor register, the argument will go on
6823    the stack.  */
6824 static struct
6825 {
6826   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6827   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6828
6829   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6830      BLKmode) is a candidate for this co-processor's registers; this
6831      function should ignore any position-dependent state in
6832      CUMULATIVE_ARGS and only use call-type dependent information.  */
6833   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6834
6835   /* Return true if the argument does get a co-processor register; it
6836      should set aapcs_reg to an RTX of the register allocated as is
6837      required for a return from FUNCTION_ARG.  */
6838   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6839
6840   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6841      be returned in this co-processor's registers.  */
6842   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6843
6844   /* Allocate and return an RTX element to hold the return type of a call.  This
6845      routine must not fail and will only be called if is_return_candidate
6846      returned true with the same parameters.  */
6847   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6848
6849   /* Finish processing this argument and prepare to start processing
6850      the next one.  */
6851   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6852 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6853   {
6854     AAPCS_CP(vfp)
6855   };
6856
6857 #undef AAPCS_CP
6858
6859 static int
6860 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6861                           const_tree type)
6862 {
6863   int i;
6864
6865   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6866     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6867       return i;
6868
6869   return -1;
6870 }
6871
6872 static int
6873 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6874 {
6875   /* We aren't passed a decl, so we can't check that a call is local.
6876      However, it isn't clear that that would be a win anyway, since it
6877      might limit some tail-calling opportunities.  */
6878   enum arm_pcs pcs_variant;
6879
6880   if (fntype)
6881     {
6882       const_tree fndecl = NULL_TREE;
6883
6884       if (TREE_CODE (fntype) == FUNCTION_DECL)
6885         {
6886           fndecl = fntype;
6887           fntype = TREE_TYPE (fntype);
6888         }
6889
6890       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6891     }
6892   else
6893     pcs_variant = arm_pcs_default;
6894
6895   if (pcs_variant != ARM_PCS_AAPCS)
6896     {
6897       int i;
6898
6899       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6900         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6901                                                         TYPE_MODE (type),
6902                                                         type))
6903           return i;
6904     }
6905   return -1;
6906 }
6907
6908 static rtx
6909 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6910                            const_tree fntype)
6911 {
6912   /* We aren't passed a decl, so we can't check that a call is local.
6913      However, it isn't clear that that would be a win anyway, since it
6914      might limit some tail-calling opportunities.  */
6915   enum arm_pcs pcs_variant;
6916   int unsignedp ATTRIBUTE_UNUSED;
6917
6918   if (fntype)
6919     {
6920       const_tree fndecl = NULL_TREE;
6921
6922       if (TREE_CODE (fntype) == FUNCTION_DECL)
6923         {
6924           fndecl = fntype;
6925           fntype = TREE_TYPE (fntype);
6926         }
6927
6928       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6929     }
6930   else
6931     pcs_variant = arm_pcs_default;
6932
6933   /* Promote integer types.  */
6934   if (type && INTEGRAL_TYPE_P (type))
6935     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6936
6937   if (pcs_variant != ARM_PCS_AAPCS)
6938     {
6939       int i;
6940
6941       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6942         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6943                                                         type))
6944           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6945                                                              mode, type);
6946     }
6947
6948   /* Promotes small structs returned in a register to full-word size
6949      for big-endian AAPCS.  */
6950   if (type && arm_return_in_msb (type))
6951     {
6952       HOST_WIDE_INT size = int_size_in_bytes (type);
6953       if (size % UNITS_PER_WORD != 0)
6954         {
6955           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6956           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6957         }
6958     }
6959
6960   return gen_rtx_REG (mode, R0_REGNUM);
6961 }
6962
6963 static rtx
6964 aapcs_libcall_value (machine_mode mode)
6965 {
6966   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6967       && GET_MODE_SIZE (mode) <= 4)
6968     mode = SImode;
6969
6970   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6971 }
6972
6973 /* Lay out a function argument using the AAPCS rules.  The rule
6974    numbers referred to here are those in the AAPCS.  */
6975 static void
6976 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6977                   const_tree type, bool named)
6978 {
6979   int nregs, nregs2;
6980   int ncrn;
6981
6982   /* We only need to do this once per argument.  */
6983   if (pcum->aapcs_arg_processed)
6984     return;
6985
6986   pcum->aapcs_arg_processed = true;
6987
6988   /* Special case: if named is false then we are handling an incoming
6989      anonymous argument which is on the stack.  */
6990   if (!named)
6991     return;
6992
6993   /* Is this a potential co-processor register candidate?  */
6994   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6995     {
6996       int slot = aapcs_select_call_coproc (pcum, mode, type);
6997       pcum->aapcs_cprc_slot = slot;
6998
6999       /* We don't have to apply any of the rules from part B of the
7000          preparation phase, these are handled elsewhere in the
7001          compiler.  */
7002
7003       if (slot >= 0)
7004         {
7005           /* A Co-processor register candidate goes either in its own
7006              class of registers or on the stack.  */
7007           if (!pcum->aapcs_cprc_failed[slot])
7008             {
7009               /* C1.cp - Try to allocate the argument to co-processor
7010                  registers.  */
7011               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7012                 return;
7013
7014               /* C2.cp - Put the argument on the stack and note that we
7015                  can't assign any more candidates in this slot.  We also
7016                  need to note that we have allocated stack space, so that
7017                  we won't later try to split a non-cprc candidate between
7018                  core registers and the stack.  */
7019               pcum->aapcs_cprc_failed[slot] = true;
7020               pcum->can_split = false;
7021             }
7022
7023           /* We didn't get a register, so this argument goes on the
7024              stack.  */
7025           gcc_assert (pcum->can_split == false);
7026           return;
7027         }
7028     }
7029
7030   /* C3 - For double-word aligned arguments, round the NCRN up to the
7031      next even number.  */
7032   ncrn = pcum->aapcs_ncrn;
7033   if (ncrn & 1)
7034     {
7035       int res = arm_needs_doubleword_align (mode, type);
7036       /* Only warn during RTL expansion of call stmts, otherwise we would
7037          warn e.g. during gimplification even on functions that will be
7038          always inlined, and we'd warn multiple times.  Don't warn when
7039          called in expand_function_start either, as we warn instead in
7040          arm_function_arg_boundary in that case.  */
7041       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7042         inform (input_location, "parameter passing for argument of type "
7043                 "%qT changed in GCC 7.1", type);
7044       else if (res > 0)
7045         ncrn++;
7046     }
7047
7048   nregs = ARM_NUM_REGS2(mode, type);
7049
7050   /* Sigh, this test should really assert that nregs > 0, but a GCC
7051      extension allows empty structs and then gives them empty size; it
7052      then allows such a structure to be passed by value.  For some of
7053      the code below we have to pretend that such an argument has
7054      non-zero size so that we 'locate' it correctly either in
7055      registers or on the stack.  */
7056   gcc_assert (nregs >= 0);
7057
7058   nregs2 = nregs ? nregs : 1;
7059
7060   /* C4 - Argument fits entirely in core registers.  */
7061   if (ncrn + nregs2 <= NUM_ARG_REGS)
7062     {
7063       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7064       pcum->aapcs_next_ncrn = ncrn + nregs;
7065       return;
7066     }
7067
7068   /* C5 - Some core registers left and there are no arguments already
7069      on the stack: split this argument between the remaining core
7070      registers and the stack.  */
7071   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7072     {
7073       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7074       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7075       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7076       return;
7077     }
7078
7079   /* C6 - NCRN is set to 4.  */
7080   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7081
7082   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7083   return;
7084 }
7085
7086 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7087    for a call to a function whose data type is FNTYPE.
7088    For a library call, FNTYPE is NULL.  */
7089 void
7090 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7091                           rtx libname,
7092                           tree fndecl ATTRIBUTE_UNUSED)
7093 {
7094   /* Long call handling.  */
7095   if (fntype)
7096     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7097   else
7098     pcum->pcs_variant = arm_pcs_default;
7099
7100   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7101     {
7102       if (arm_libcall_uses_aapcs_base (libname))
7103         pcum->pcs_variant = ARM_PCS_AAPCS;
7104
7105       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7106       pcum->aapcs_reg = NULL_RTX;
7107       pcum->aapcs_partial = 0;
7108       pcum->aapcs_arg_processed = false;
7109       pcum->aapcs_cprc_slot = -1;
7110       pcum->can_split = true;
7111
7112       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7113         {
7114           int i;
7115
7116           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7117             {
7118               pcum->aapcs_cprc_failed[i] = false;
7119               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7120             }
7121         }
7122       return;
7123     }
7124
7125   /* Legacy ABIs */
7126
7127   /* On the ARM, the offset starts at 0.  */
7128   pcum->nregs = 0;
7129   pcum->iwmmxt_nregs = 0;
7130   pcum->can_split = true;
7131
7132   /* Varargs vectors are treated the same as long long.
7133      named_count avoids having to change the way arm handles 'named' */
7134   pcum->named_count = 0;
7135   pcum->nargs = 0;
7136
7137   if (TARGET_REALLY_IWMMXT && fntype)
7138     {
7139       tree fn_arg;
7140
7141       for (fn_arg = TYPE_ARG_TYPES (fntype);
7142            fn_arg;
7143            fn_arg = TREE_CHAIN (fn_arg))
7144         pcum->named_count += 1;
7145
7146       if (! pcum->named_count)
7147         pcum->named_count = INT_MAX;
7148     }
7149 }
7150
7151 /* Return 2 if double word alignment is required for argument passing,
7152    but wasn't required before the fix for PR88469.
7153    Return 1 if double word alignment is required for argument passing.
7154    Return -1 if double word alignment used to be required for argument
7155    passing before PR77728 ABI fix, but is not required anymore.
7156    Return 0 if double word alignment is not required and wasn't requried
7157    before either.  */
7158 static int
7159 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7160 {
7161   if (!type)
7162     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7163
7164   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7165   if (!AGGREGATE_TYPE_P (type))
7166     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7167
7168   /* Array types: Use member alignment of element type.  */
7169   if (TREE_CODE (type) == ARRAY_TYPE)
7170     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7171
7172   int ret = 0;
7173   int ret2 = 0;
7174   /* Record/aggregate types: Use greatest member alignment of any member.
7175
7176      Note that we explicitly consider zero-sized fields here, even though
7177      they don't map to AAPCS machine types.  For example, in:
7178
7179          struct __attribute__((aligned(8))) empty {};
7180
7181          struct s {
7182            [[no_unique_address]] empty e;
7183            int x;
7184          };
7185
7186      "s" contains only one Fundamental Data Type (the int field)
7187      but gains 8-byte alignment and size thanks to "e".  */
7188   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7189     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7190       {
7191         if (TREE_CODE (field) == FIELD_DECL)
7192           return 1;
7193         else
7194           /* Before PR77728 fix, we were incorrectly considering also
7195              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7196              Make sure we can warn about that with -Wpsabi.  */
7197           ret = -1;
7198       }
7199     else if (TREE_CODE (field) == FIELD_DECL
7200              && DECL_BIT_FIELD_TYPE (field)
7201              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7202       ret2 = 1;
7203
7204   if (ret2)
7205     return 2;
7206
7207   return ret;
7208 }
7209
7210
7211 /* Determine where to put an argument to a function.
7212    Value is zero to push the argument on the stack,
7213    or a hard register in which to store the argument.
7214
7215    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7216     the preceding args and about the function being called.
7217    ARG is a description of the argument.
7218
7219    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7220    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7221    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7222    defined), say it is passed in the stack (function_prologue will
7223    indeed make it pass in the stack if necessary).  */
7224
7225 static rtx
7226 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7227 {
7228   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7229   int nregs;
7230
7231   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7232      a call insn (op3 of a call_value insn).  */
7233   if (arg.end_marker_p ())
7234     return const0_rtx;
7235
7236   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7237     {
7238       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7239       return pcum->aapcs_reg;
7240     }
7241
7242   /* Varargs vectors are treated the same as long long.
7243      named_count avoids having to change the way arm handles 'named' */
7244   if (TARGET_IWMMXT_ABI
7245       && arm_vector_mode_supported_p (arg.mode)
7246       && pcum->named_count > pcum->nargs + 1)
7247     {
7248       if (pcum->iwmmxt_nregs <= 9)
7249         return gen_rtx_REG (arg.mode,
7250                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7251       else
7252         {
7253           pcum->can_split = false;
7254           return NULL_RTX;
7255         }
7256     }
7257
7258   /* Put doubleword aligned quantities in even register pairs.  */
7259   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7260     {
7261       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7262       if (res < 0 && warn_psabi)
7263         inform (input_location, "parameter passing for argument of type "
7264                 "%qT changed in GCC 7.1", arg.type);
7265       else if (res > 0)
7266         {
7267           pcum->nregs++;
7268           if (res > 1 && warn_psabi)
7269             inform (input_location, "parameter passing for argument of type "
7270                     "%qT changed in GCC 9.1", arg.type);
7271         }
7272     }
7273
7274   /* Only allow splitting an arg between regs and memory if all preceding
7275      args were allocated to regs.  For args passed by reference we only count
7276      the reference pointer.  */
7277   if (pcum->can_split)
7278     nregs = 1;
7279   else
7280     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7281
7282   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7283     return NULL_RTX;
7284
7285   return gen_rtx_REG (arg.mode, pcum->nregs);
7286 }
7287
7288 static unsigned int
7289 arm_function_arg_boundary (machine_mode mode, const_tree type)
7290 {
7291   if (!ARM_DOUBLEWORD_ALIGN)
7292     return PARM_BOUNDARY;
7293
7294   int res = arm_needs_doubleword_align (mode, type);
7295   if (res < 0 && warn_psabi)
7296     inform (input_location, "parameter passing for argument of type %qT "
7297             "changed in GCC 7.1", type);
7298   if (res > 1 && warn_psabi)
7299     inform (input_location, "parameter passing for argument of type "
7300             "%qT changed in GCC 9.1", type);
7301
7302   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7303 }
7304
7305 static int
7306 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7307 {
7308   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7309   int nregs = pcum->nregs;
7310
7311   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7312     {
7313       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7314       return pcum->aapcs_partial;
7315     }
7316
7317   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7318     return 0;
7319
7320   if (NUM_ARG_REGS > nregs
7321       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7322       && pcum->can_split)
7323     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7324
7325   return 0;
7326 }
7327
7328 /* Update the data in PCUM to advance over argument ARG.  */
7329
7330 static void
7331 arm_function_arg_advance (cumulative_args_t pcum_v,
7332                           const function_arg_info &arg)
7333 {
7334   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7335
7336   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7337     {
7338       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7339
7340       if (pcum->aapcs_cprc_slot >= 0)
7341         {
7342           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7343                                                               arg.type);
7344           pcum->aapcs_cprc_slot = -1;
7345         }
7346
7347       /* Generic stuff.  */
7348       pcum->aapcs_arg_processed = false;
7349       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7350       pcum->aapcs_reg = NULL_RTX;
7351       pcum->aapcs_partial = 0;
7352     }
7353   else
7354     {
7355       pcum->nargs += 1;
7356       if (arm_vector_mode_supported_p (arg.mode)
7357           && pcum->named_count > pcum->nargs
7358           && TARGET_IWMMXT_ABI)
7359         pcum->iwmmxt_nregs += 1;
7360       else
7361         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7362     }
7363 }
7364
7365 /* Variable sized types are passed by reference.  This is a GCC
7366    extension to the ARM ABI.  */
7367
7368 static bool
7369 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7370 {
7371   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7372 }
7373 \f
7374 /* Encode the current state of the #pragma [no_]long_calls.  */
7375 typedef enum
7376 {
7377   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7378   LONG,         /* #pragma long_calls is in effect.  */
7379   SHORT         /* #pragma no_long_calls is in effect.  */
7380 } arm_pragma_enum;
7381
7382 static arm_pragma_enum arm_pragma_long_calls = OFF;
7383
7384 void
7385 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7386 {
7387   arm_pragma_long_calls = LONG;
7388 }
7389
7390 void
7391 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7392 {
7393   arm_pragma_long_calls = SHORT;
7394 }
7395
7396 void
7397 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7398 {
7399   arm_pragma_long_calls = OFF;
7400 }
7401 \f
7402 /* Handle an attribute requiring a FUNCTION_DECL;
7403    arguments as in struct attribute_spec.handler.  */
7404 static tree
7405 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7406                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7407 {
7408   if (TREE_CODE (*node) != FUNCTION_DECL)
7409     {
7410       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7411                name);
7412       *no_add_attrs = true;
7413     }
7414
7415   return NULL_TREE;
7416 }
7417
7418 /* Handle an "interrupt" or "isr" attribute;
7419    arguments as in struct attribute_spec.handler.  */
7420 static tree
7421 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7422                           bool *no_add_attrs)
7423 {
7424   if (DECL_P (*node))
7425     {
7426       if (TREE_CODE (*node) != FUNCTION_DECL)
7427         {
7428           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7429                    name);
7430           *no_add_attrs = true;
7431         }
7432       else if (TARGET_VFP_BASE)
7433         {
7434           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7435                    name);
7436         }
7437       /* FIXME: the argument if any is checked for type attributes;
7438          should it be checked for decl ones?  */
7439     }
7440   else
7441     {
7442       if (TREE_CODE (*node) == FUNCTION_TYPE
7443           || TREE_CODE (*node) == METHOD_TYPE)
7444         {
7445           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7446             {
7447               warning (OPT_Wattributes, "%qE attribute ignored",
7448                        name);
7449               *no_add_attrs = true;
7450             }
7451         }
7452       else if (TREE_CODE (*node) == POINTER_TYPE
7453                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7454                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7455                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7456         {
7457           *node = build_variant_type_copy (*node);
7458           TREE_TYPE (*node) = build_type_attribute_variant
7459             (TREE_TYPE (*node),
7460              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7461           *no_add_attrs = true;
7462         }
7463       else
7464         {
7465           /* Possibly pass this attribute on from the type to a decl.  */
7466           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7467                        | (int) ATTR_FLAG_FUNCTION_NEXT
7468                        | (int) ATTR_FLAG_ARRAY_NEXT))
7469             {
7470               *no_add_attrs = true;
7471               return tree_cons (name, args, NULL_TREE);
7472             }
7473           else
7474             {
7475               warning (OPT_Wattributes, "%qE attribute ignored",
7476                        name);
7477             }
7478         }
7479     }
7480
7481   return NULL_TREE;
7482 }
7483
7484 /* Handle a "pcs" attribute; arguments as in struct
7485    attribute_spec.handler.  */
7486 static tree
7487 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7488                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7489 {
7490   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7491     {
7492       warning (OPT_Wattributes, "%qE attribute ignored", name);
7493       *no_add_attrs = true;
7494     }
7495   return NULL_TREE;
7496 }
7497
7498 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7499 /* Handle the "notshared" attribute.  This attribute is another way of
7500    requesting hidden visibility.  ARM's compiler supports
7501    "__declspec(notshared)"; we support the same thing via an
7502    attribute.  */
7503
7504 static tree
7505 arm_handle_notshared_attribute (tree *node,
7506                                 tree name ATTRIBUTE_UNUSED,
7507                                 tree args ATTRIBUTE_UNUSED,
7508                                 int flags ATTRIBUTE_UNUSED,
7509                                 bool *no_add_attrs)
7510 {
7511   tree decl = TYPE_NAME (*node);
7512
7513   if (decl)
7514     {
7515       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7516       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7517       *no_add_attrs = false;
7518     }
7519   return NULL_TREE;
7520 }
7521 #endif
7522
7523 /* This function returns true if a function with declaration FNDECL and type
7524    FNTYPE uses the stack to pass arguments or return variables and false
7525    otherwise.  This is used for functions with the attributes
7526    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7527    diagnostic messages if the stack is used.  NAME is the name of the attribute
7528    used.  */
7529
7530 static bool
7531 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7532 {
7533   function_args_iterator args_iter;
7534   CUMULATIVE_ARGS args_so_far_v;
7535   cumulative_args_t args_so_far;
7536   bool first_param = true;
7537   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7538
7539   /* Error out if any argument is passed on the stack.  */
7540   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7541   args_so_far = pack_cumulative_args (&args_so_far_v);
7542   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7543     {
7544       rtx arg_rtx;
7545
7546       prev_arg_type = arg_type;
7547       if (VOID_TYPE_P (arg_type))
7548         continue;
7549
7550       function_arg_info arg (arg_type, /*named=*/true);
7551       if (!first_param)
7552         /* ??? We should advance after processing the argument and pass
7553            the argument we're advancing past.  */
7554         arm_function_arg_advance (args_so_far, arg);
7555       arg_rtx = arm_function_arg (args_so_far, arg);
7556       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7557         {
7558           error ("%qE attribute not available to functions with arguments "
7559                  "passed on the stack", name);
7560           return true;
7561         }
7562       first_param = false;
7563     }
7564
7565   /* Error out for variadic functions since we cannot control how many
7566      arguments will be passed and thus stack could be used.  stdarg_p () is not
7567      used for the checking to avoid browsing arguments twice.  */
7568   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7569     {
7570       error ("%qE attribute not available to functions with variable number "
7571              "of arguments", name);
7572       return true;
7573     }
7574
7575   /* Error out if return value is passed on the stack.  */
7576   ret_type = TREE_TYPE (fntype);
7577   if (arm_return_in_memory (ret_type, fntype))
7578     {
7579       error ("%qE attribute not available to functions that return value on "
7580              "the stack", name);
7581       return true;
7582     }
7583   return false;
7584 }
7585
7586 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7587    function will check whether the attribute is allowed here and will add the
7588    attribute to the function declaration tree or otherwise issue a warning.  */
7589
7590 static tree
7591 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7592                                  tree /* args */,
7593                                  int /* flags */,
7594                                  bool *no_add_attrs)
7595 {
7596   tree fndecl;
7597
7598   if (!use_cmse)
7599     {
7600       *no_add_attrs = true;
7601       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7602                "option", name);
7603       return NULL_TREE;
7604     }
7605
7606   /* Ignore attribute for function types.  */
7607   if (TREE_CODE (*node) != FUNCTION_DECL)
7608     {
7609       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7610                name);
7611       *no_add_attrs = true;
7612       return NULL_TREE;
7613     }
7614
7615   fndecl = *node;
7616
7617   /* Warn for static linkage functions.  */
7618   if (!TREE_PUBLIC (fndecl))
7619     {
7620       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7621                "with static linkage", name);
7622       *no_add_attrs = true;
7623       return NULL_TREE;
7624     }
7625
7626   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7627                                                 TREE_TYPE (fndecl));
7628   return NULL_TREE;
7629 }
7630
7631
7632 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7633    function will check whether the attribute is allowed here and will add the
7634    attribute to the function type tree or otherwise issue a diagnostic.  The
7635    reason we check this at declaration time is to only allow the use of the
7636    attribute with declarations of function pointers and not function
7637    declarations.  This function checks NODE is of the expected type and issues
7638    diagnostics otherwise using NAME.  If it is not of the expected type
7639    *NO_ADD_ATTRS will be set to true.  */
7640
7641 static tree
7642 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7643                                  tree /* args */,
7644                                  int /* flags */,
7645                                  bool *no_add_attrs)
7646 {
7647   tree decl = NULL_TREE;
7648   tree fntype, type;
7649
7650   if (!use_cmse)
7651     {
7652       *no_add_attrs = true;
7653       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7654                "option", name);
7655       return NULL_TREE;
7656     }
7657
7658   if (DECL_P (*node))
7659     {
7660       fntype = TREE_TYPE (*node);
7661
7662       if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7663         decl = *node;
7664     }
7665   else
7666     fntype = *node;
7667
7668   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7669     fntype = TREE_TYPE (fntype);
7670
7671   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7672     {
7673         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7674                  "function pointer", name);
7675         *no_add_attrs = true;
7676         return NULL_TREE;
7677     }
7678
7679   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7680
7681   if (*no_add_attrs)
7682     return NULL_TREE;
7683
7684   /* Prevent trees being shared among function types with and without
7685      cmse_nonsecure_call attribute.  */
7686   if (decl)
7687     {
7688       type = build_distinct_type_copy (TREE_TYPE (decl));
7689       TREE_TYPE (decl) = type;
7690     }
7691   else
7692     {
7693       type = build_distinct_type_copy (*node);
7694       *node = type;
7695     }
7696
7697   fntype = type;
7698
7699   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7700     {
7701       type = fntype;
7702       fntype = TREE_TYPE (fntype);
7703       fntype = build_distinct_type_copy (fntype);
7704       TREE_TYPE (type) = fntype;
7705     }
7706
7707   /* Construct a type attribute and add it to the function type.  */
7708   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7709                           TYPE_ATTRIBUTES (fntype));
7710   TYPE_ATTRIBUTES (fntype) = attrs;
7711   return NULL_TREE;
7712 }
7713
7714 /* Return 0 if the attributes for two types are incompatible, 1 if they
7715    are compatible, and 2 if they are nearly compatible (which causes a
7716    warning to be generated).  */
7717 static int
7718 arm_comp_type_attributes (const_tree type1, const_tree type2)
7719 {
7720   int l1, l2, s1, s2;
7721
7722   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7723                                   TYPE_ATTRIBUTES (type1));
7724   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7725                                   TYPE_ATTRIBUTES (type2));
7726   if (bool (attrs1) != bool (attrs2))
7727     return 0;
7728   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7729     return 0;
7730
7731   /* Check for mismatch of non-default calling convention.  */
7732   if (TREE_CODE (type1) != FUNCTION_TYPE)
7733     return 1;
7734
7735   /* Check for mismatched call attributes.  */
7736   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7737   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7738   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7739   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7740
7741   /* Only bother to check if an attribute is defined.  */
7742   if (l1 | l2 | s1 | s2)
7743     {
7744       /* If one type has an attribute, the other must have the same attribute.  */
7745       if ((l1 != l2) || (s1 != s2))
7746         return 0;
7747
7748       /* Disallow mixed attributes.  */
7749       if ((l1 & s2) || (l2 & s1))
7750         return 0;
7751     }
7752
7753   /* Check for mismatched ISR attribute.  */
7754   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7755   if (! l1)
7756     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7757   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7758   if (! l2)
7759     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7760   if (l1 != l2)
7761     return 0;
7762
7763   l1 = lookup_attribute ("cmse_nonsecure_call",
7764                          TYPE_ATTRIBUTES (type1)) != NULL;
7765   l2 = lookup_attribute ("cmse_nonsecure_call",
7766                          TYPE_ATTRIBUTES (type2)) != NULL;
7767
7768   if (l1 != l2)
7769     return 0;
7770
7771   return 1;
7772 }
7773
7774 /*  Assigns default attributes to newly defined type.  This is used to
7775     set short_call/long_call attributes for function types of
7776     functions defined inside corresponding #pragma scopes.  */
7777 static void
7778 arm_set_default_type_attributes (tree type)
7779 {
7780   /* Add __attribute__ ((long_call)) to all functions, when
7781      inside #pragma long_calls or __attribute__ ((short_call)),
7782      when inside #pragma no_long_calls.  */
7783   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7784     {
7785       tree type_attr_list, attr_name;
7786       type_attr_list = TYPE_ATTRIBUTES (type);
7787
7788       if (arm_pragma_long_calls == LONG)
7789         attr_name = get_identifier ("long_call");
7790       else if (arm_pragma_long_calls == SHORT)
7791         attr_name = get_identifier ("short_call");
7792       else
7793         return;
7794
7795       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7796       TYPE_ATTRIBUTES (type) = type_attr_list;
7797     }
7798 }
7799 \f
7800 /* Return true if DECL is known to be linked into section SECTION.  */
7801
7802 static bool
7803 arm_function_in_section_p (tree decl, section *section)
7804 {
7805   /* We can only be certain about the prevailing symbol definition.  */
7806   if (!decl_binds_to_current_def_p (decl))
7807     return false;
7808
7809   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7810   if (!DECL_SECTION_NAME (decl))
7811     {
7812       /* Make sure that we will not create a unique section for DECL.  */
7813       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7814         return false;
7815     }
7816
7817   return function_section (decl) == section;
7818 }
7819
7820 /* Return nonzero if a 32-bit "long_call" should be generated for
7821    a call from the current function to DECL.  We generate a long_call
7822    if the function:
7823
7824         a.  has an __attribute__((long call))
7825      or b.  is within the scope of a #pragma long_calls
7826      or c.  the -mlong-calls command line switch has been specified
7827
7828    However we do not generate a long call if the function:
7829
7830         d.  has an __attribute__ ((short_call))
7831      or e.  is inside the scope of a #pragma no_long_calls
7832      or f.  is defined in the same section as the current function.  */
7833
7834 bool
7835 arm_is_long_call_p (tree decl)
7836 {
7837   tree attrs;
7838
7839   if (!decl)
7840     return TARGET_LONG_CALLS;
7841
7842   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7843   if (lookup_attribute ("short_call", attrs))
7844     return false;
7845
7846   /* For "f", be conservative, and only cater for cases in which the
7847      whole of the current function is placed in the same section.  */
7848   if (!flag_reorder_blocks_and_partition
7849       && TREE_CODE (decl) == FUNCTION_DECL
7850       && arm_function_in_section_p (decl, current_function_section ()))
7851     return false;
7852
7853   if (lookup_attribute ("long_call", attrs))
7854     return true;
7855
7856   return TARGET_LONG_CALLS;
7857 }
7858
7859 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7860 static bool
7861 arm_function_ok_for_sibcall (tree decl, tree exp)
7862 {
7863   unsigned long func_type;
7864
7865   if (cfun->machine->sibcall_blocked)
7866     return false;
7867
7868   if (TARGET_FDPIC)
7869     {
7870       /* In FDPIC, never tailcall something for which we have no decl:
7871          the target function could be in a different module, requiring
7872          a different FDPIC register value.  */
7873       if (decl == NULL)
7874         return false;
7875     }
7876
7877   /* Never tailcall something if we are generating code for Thumb-1.  */
7878   if (TARGET_THUMB1)
7879     return false;
7880
7881   /* The PIC register is live on entry to VxWorks PLT entries, so we
7882      must make the call before restoring the PIC register.  */
7883   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7884     return false;
7885
7886   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7887      may be used both as target of the call and base register for restoring
7888      the VFP registers  */
7889   if (TARGET_APCS_FRAME && TARGET_ARM
7890       && TARGET_HARD_FLOAT
7891       && decl && arm_is_long_call_p (decl))
7892     return false;
7893
7894   /* If we are interworking and the function is not declared static
7895      then we can't tail-call it unless we know that it exists in this
7896      compilation unit (since it might be a Thumb routine).  */
7897   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7898       && !TREE_ASM_WRITTEN (decl))
7899     return false;
7900
7901   func_type = arm_current_func_type ();
7902   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7903   if (IS_INTERRUPT (func_type))
7904     return false;
7905
7906   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7907      generated for entry functions themselves.  */
7908   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7909     return false;
7910
7911   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7912      this would complicate matters for later code generation.  */
7913   if (TREE_CODE (exp) == CALL_EXPR)
7914     {
7915       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7916       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7917         return false;
7918     }
7919
7920   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7921     {
7922       /* Check that the return value locations are the same.  For
7923          example that we aren't returning a value from the sibling in
7924          a VFP register but then need to transfer it to a core
7925          register.  */
7926       rtx a, b;
7927       tree decl_or_type = decl;
7928
7929       /* If it is an indirect function pointer, get the function type.  */
7930       if (!decl)
7931         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7932
7933       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7934       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7935                               cfun->decl, false);
7936       if (!rtx_equal_p (a, b))
7937         return false;
7938     }
7939
7940   /* Never tailcall if function may be called with a misaligned SP.  */
7941   if (IS_STACKALIGN (func_type))
7942     return false;
7943
7944   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7945      references should become a NOP.  Don't convert such calls into
7946      sibling calls.  */
7947   if (TARGET_AAPCS_BASED
7948       && arm_abi == ARM_ABI_AAPCS
7949       && decl
7950       && DECL_WEAK (decl))
7951     return false;
7952
7953   /* We cannot do a tailcall for an indirect call by descriptor if all the
7954      argument registers are used because the only register left to load the
7955      address is IP and it will already contain the static chain.  */
7956   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7957     {
7958       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7959       CUMULATIVE_ARGS cum;
7960       cumulative_args_t cum_v;
7961
7962       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7963       cum_v = pack_cumulative_args (&cum);
7964
7965       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7966         {
7967           tree type = TREE_VALUE (t);
7968           if (!VOID_TYPE_P (type))
7969             {
7970               function_arg_info arg (type, /*named=*/true);
7971               arm_function_arg_advance (cum_v, arg);
7972             }
7973         }
7974
7975       function_arg_info arg (integer_type_node, /*named=*/true);
7976       if (!arm_function_arg (cum_v, arg))
7977         return false;
7978     }
7979
7980   /* Everything else is ok.  */
7981   return true;
7982 }
7983
7984 \f
7985 /* Addressing mode support functions.  */
7986
7987 /* Return nonzero if X is a legitimate immediate operand when compiling
7988    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7989 int
7990 legitimate_pic_operand_p (rtx x)
7991 {
7992   if (SYMBOL_REF_P (x)
7993       || (GET_CODE (x) == CONST
7994           && GET_CODE (XEXP (x, 0)) == PLUS
7995           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7996     return 0;
7997
7998   return 1;
7999 }
8000
8001 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8002    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8003    both case cfun->machine->pic_reg is initialized if we have not already done
8004    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8005    PIC register is reloaded in the current position of the instruction stream
8006    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8007    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8008    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8009    is only supported iff COMPUTE_NOW is false.  */
8010
8011 static void
8012 require_pic_register (rtx pic_reg, bool compute_now)
8013 {
8014   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8015
8016   /* A lot of the logic here is made obscure by the fact that this
8017      routine gets called as part of the rtx cost estimation process.
8018      We don't want those calls to affect any assumptions about the real
8019      function; and further, we can't call entry_of_function() until we
8020      start the real expansion process.  */
8021   if (!crtl->uses_pic_offset_table || compute_now)
8022     {
8023       gcc_assert (can_create_pseudo_p ()
8024                   || (pic_reg != NULL_RTX
8025                       && REG_P (pic_reg)
8026                       && GET_MODE (pic_reg) == Pmode));
8027       if (arm_pic_register != INVALID_REGNUM
8028           && !compute_now
8029           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8030         {
8031           if (!cfun->machine->pic_reg)
8032             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8033
8034           /* Play games to avoid marking the function as needing pic
8035              if we are being called as part of the cost-estimation
8036              process.  */
8037           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8038             crtl->uses_pic_offset_table = 1;
8039         }
8040       else
8041         {
8042           rtx_insn *seq, *insn;
8043
8044           if (pic_reg == NULL_RTX)
8045             pic_reg = gen_reg_rtx (Pmode);
8046           if (!cfun->machine->pic_reg)
8047             cfun->machine->pic_reg = pic_reg;
8048
8049           /* Play games to avoid marking the function as needing pic
8050              if we are being called as part of the cost-estimation
8051              process.  */
8052           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8053             {
8054               crtl->uses_pic_offset_table = 1;
8055               start_sequence ();
8056
8057               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8058                   && arm_pic_register > LAST_LO_REGNUM
8059                   && !compute_now)
8060                 emit_move_insn (cfun->machine->pic_reg,
8061                                 gen_rtx_REG (Pmode, arm_pic_register));
8062               else
8063                 arm_load_pic_register (0UL, pic_reg);
8064
8065               seq = get_insns ();
8066               end_sequence ();
8067
8068               for (insn = seq; insn; insn = NEXT_INSN (insn))
8069                 if (INSN_P (insn))
8070                   INSN_LOCATION (insn) = prologue_location;
8071
8072               /* We can be called during expansion of PHI nodes, where
8073                  we can't yet emit instructions directly in the final
8074                  insn stream.  Queue the insns on the entry edge, they will
8075                  be committed after everything else is expanded.  */
8076               if (currently_expanding_to_rtl)
8077                 insert_insn_on_edge (seq,
8078                                      single_succ_edge
8079                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8080               else
8081                 emit_insn (seq);
8082             }
8083         }
8084     }
8085 }
8086
8087 /* Generate insns to calculate the address of ORIG in pic mode.  */
8088 static rtx_insn *
8089 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8090 {
8091   rtx pat;
8092   rtx mem;
8093
8094   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8095
8096   /* Make the MEM as close to a constant as possible.  */
8097   mem = SET_SRC (pat);
8098   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8099   MEM_READONLY_P (mem) = 1;
8100   MEM_NOTRAP_P (mem) = 1;
8101
8102   return emit_insn (pat);
8103 }
8104
8105 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8106    created to hold the result of the load.  If not NULL, PIC_REG indicates
8107    which register to use as PIC register, otherwise it is decided by register
8108    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8109    location in the instruction stream, irregardless of whether it was loaded
8110    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8111    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8112
8113    Returns the register REG into which the PIC load is performed.  */
8114
8115 rtx
8116 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8117                         bool compute_now)
8118 {
8119   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8120
8121   if (SYMBOL_REF_P (orig)
8122       || LABEL_REF_P (orig))
8123     {
8124       if (reg == 0)
8125         {
8126           gcc_assert (can_create_pseudo_p ());
8127           reg = gen_reg_rtx (Pmode);
8128         }
8129
8130       /* VxWorks does not impose a fixed gap between segments; the run-time
8131          gap can be different from the object-file gap.  We therefore can't
8132          use GOTOFF unless we are absolutely sure that the symbol is in the
8133          same segment as the GOT.  Unfortunately, the flexibility of linker
8134          scripts means that we can't be sure of that in general, so assume
8135          that GOTOFF is never valid on VxWorks.  */
8136       /* References to weak symbols cannot be resolved locally: they
8137          may be overridden by a non-weak definition at link time.  */
8138       rtx_insn *insn;
8139       if ((LABEL_REF_P (orig)
8140            || (SYMBOL_REF_P (orig)
8141                && SYMBOL_REF_LOCAL_P (orig)
8142                && (SYMBOL_REF_DECL (orig)
8143                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8144                && (!SYMBOL_REF_FUNCTION_P (orig)
8145                    || arm_fdpic_local_funcdesc_p (orig))))
8146           && NEED_GOT_RELOC
8147           && arm_pic_data_is_text_relative)
8148         insn = arm_pic_static_addr (orig, reg);
8149       else
8150         {
8151           /* If this function doesn't have a pic register, create one now.  */
8152           require_pic_register (pic_reg, compute_now);
8153
8154           if (pic_reg == NULL_RTX)
8155             pic_reg = cfun->machine->pic_reg;
8156
8157           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8158         }
8159
8160       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8161          by loop.  */
8162       set_unique_reg_note (insn, REG_EQUAL, orig);
8163
8164       return reg;
8165     }
8166   else if (GET_CODE (orig) == CONST)
8167     {
8168       rtx base, offset;
8169
8170       if (GET_CODE (XEXP (orig, 0)) == PLUS
8171           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8172         return orig;
8173
8174       /* Handle the case where we have: const (UNSPEC_TLS).  */
8175       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8176           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8177         return orig;
8178
8179       /* Handle the case where we have:
8180          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8181          CONST_INT.  */
8182       if (GET_CODE (XEXP (orig, 0)) == PLUS
8183           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8184           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8185         {
8186           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8187           return orig;
8188         }
8189
8190       if (reg == 0)
8191         {
8192           gcc_assert (can_create_pseudo_p ());
8193           reg = gen_reg_rtx (Pmode);
8194         }
8195
8196       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8197
8198       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8199                                      pic_reg, compute_now);
8200       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8201                                        base == reg ? 0 : reg, pic_reg,
8202                                        compute_now);
8203
8204       if (CONST_INT_P (offset))
8205         {
8206           /* The base register doesn't really matter, we only want to
8207              test the index for the appropriate mode.  */
8208           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8209             {
8210               gcc_assert (can_create_pseudo_p ());
8211               offset = force_reg (Pmode, offset);
8212             }
8213
8214           if (CONST_INT_P (offset))
8215             return plus_constant (Pmode, base, INTVAL (offset));
8216         }
8217
8218       if (GET_MODE_SIZE (mode) > 4
8219           && (GET_MODE_CLASS (mode) == MODE_INT
8220               || TARGET_SOFT_FLOAT))
8221         {
8222           emit_insn (gen_addsi3 (reg, base, offset));
8223           return reg;
8224         }
8225
8226       return gen_rtx_PLUS (Pmode, base, offset);
8227     }
8228
8229   return orig;
8230 }
8231
8232
8233 /* Generate insns that produce the address of the stack canary */
8234 rtx
8235 arm_stack_protect_tls_canary_mem (bool reload)
8236 {
8237   rtx tp = gen_reg_rtx (SImode);
8238   if (reload)
8239     emit_insn (gen_reload_tp_hard (tp));
8240   else
8241     emit_insn (gen_load_tp_hard (tp));
8242
8243   rtx reg = gen_reg_rtx (SImode);
8244   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8245   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8246   return gen_rtx_MEM (SImode, reg);
8247 }
8248
8249
8250 /* Whether a register is callee saved or not.  This is necessary because high
8251    registers are marked as caller saved when optimizing for size on Thumb-1
8252    targets despite being callee saved in order to avoid using them.  */
8253 #define callee_saved_reg_p(reg) \
8254   (!call_used_or_fixed_reg_p (reg) \
8255    || (TARGET_THUMB1 && optimize_size \
8256        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8257
8258 /* Return a mask for the call-clobbered low registers that are unused
8259    at the end of the prologue.  */
8260 static unsigned long
8261 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8262 {
8263   unsigned long mask = 0;
8264   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8265
8266   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8267     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8268       mask |= 1 << (reg - FIRST_LO_REGNUM);
8269   return mask;
8270 }
8271
8272 /* Similarly for the start of the epilogue.  */
8273 static unsigned long
8274 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8275 {
8276   unsigned long mask = 0;
8277   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8278
8279   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8280     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8281       mask |= 1 << (reg - FIRST_LO_REGNUM);
8282   return mask;
8283 }
8284
8285 /* Find a spare register to use during the prolog of a function.  */
8286
8287 static int
8288 thumb_find_work_register (unsigned long pushed_regs_mask)
8289 {
8290   int reg;
8291
8292   unsigned long unused_regs
8293     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8294
8295   /* Check the argument registers first as these are call-used.  The
8296      register allocation order means that sometimes r3 might be used
8297      but earlier argument registers might not, so check them all.  */
8298   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8299     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8300       return reg;
8301
8302   /* Otherwise look for a call-saved register that is going to be pushed.  */
8303   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8304     if (pushed_regs_mask & (1 << reg))
8305       return reg;
8306
8307   if (TARGET_THUMB2)
8308     {
8309       /* Thumb-2 can use high regs.  */
8310       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8311         if (pushed_regs_mask & (1 << reg))
8312           return reg;
8313     }
8314   /* Something went wrong - thumb_compute_save_reg_mask()
8315      should have arranged for a suitable register to be pushed.  */
8316   gcc_unreachable ();
8317 }
8318
8319 static GTY(()) int pic_labelno;
8320
8321 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8322    low register.  */
8323
8324 void
8325 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8326 {
8327   rtx l1, labelno, pic_tmp, pic_rtx;
8328
8329   if (crtl->uses_pic_offset_table == 0
8330       || TARGET_SINGLE_PIC_BASE
8331       || TARGET_FDPIC)
8332     return;
8333
8334   gcc_assert (flag_pic);
8335
8336   if (pic_reg == NULL_RTX)
8337     pic_reg = cfun->machine->pic_reg;
8338   if (TARGET_VXWORKS_RTP)
8339     {
8340       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8341       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8342       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8343
8344       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8345
8346       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8347       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8348     }
8349   else
8350     {
8351       /* We use an UNSPEC rather than a LABEL_REF because this label
8352          never appears in the code stream.  */
8353
8354       labelno = GEN_INT (pic_labelno++);
8355       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8356       l1 = gen_rtx_CONST (VOIDmode, l1);
8357
8358       /* On the ARM the PC register contains 'dot + 8' at the time of the
8359          addition, on the Thumb it is 'dot + 4'.  */
8360       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8361       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8362                                 UNSPEC_GOTSYM_OFF);
8363       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8364
8365       if (TARGET_32BIT)
8366         {
8367           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8368         }
8369       else /* TARGET_THUMB1 */
8370         {
8371           if (arm_pic_register != INVALID_REGNUM
8372               && REGNO (pic_reg) > LAST_LO_REGNUM)
8373             {
8374               /* We will have pushed the pic register, so we should always be
8375                  able to find a work register.  */
8376               pic_tmp = gen_rtx_REG (SImode,
8377                                      thumb_find_work_register (saved_regs));
8378               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8379               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8380               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8381             }
8382           else if (arm_pic_register != INVALID_REGNUM
8383                    && arm_pic_register > LAST_LO_REGNUM
8384                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8385             {
8386               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8387               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8388               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8389             }
8390           else
8391             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8392         }
8393     }
8394
8395   /* Need to emit this whether or not we obey regdecls,
8396      since setjmp/longjmp can cause life info to screw up.  */
8397   emit_use (pic_reg);
8398 }
8399
8400 /* Try to determine whether an object, referenced via ORIG, will be
8401    placed in the text or data segment.  This is used in FDPIC mode, to
8402    decide which relocations to use when accessing ORIG.  *IS_READONLY
8403    is set to true if ORIG is a read-only location, false otherwise.
8404    Return true if we could determine the location of ORIG, false
8405    otherwise.  *IS_READONLY is valid only when we return true.  */
8406 static bool
8407 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8408 {
8409   *is_readonly = false;
8410
8411   if (LABEL_REF_P (orig))
8412     {
8413       *is_readonly = true;
8414       return true;
8415     }
8416
8417   if (SYMBOL_REF_P (orig))
8418     {
8419       if (CONSTANT_POOL_ADDRESS_P (orig))
8420         {
8421           *is_readonly = true;
8422           return true;
8423         }
8424       if (SYMBOL_REF_LOCAL_P (orig)
8425           && !SYMBOL_REF_EXTERNAL_P (orig)
8426           && SYMBOL_REF_DECL (orig)
8427           && (!DECL_P (SYMBOL_REF_DECL (orig))
8428               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8429         {
8430           tree decl = SYMBOL_REF_DECL (orig);
8431           tree init = (TREE_CODE (decl) == VAR_DECL)
8432             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8433             ? decl : 0;
8434           int reloc = 0;
8435           bool named_section, readonly;
8436
8437           if (init && init != error_mark_node)
8438             reloc = compute_reloc_for_constant (init);
8439
8440           named_section = TREE_CODE (decl) == VAR_DECL
8441             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8442           readonly = decl_readonly_section (decl, reloc);
8443
8444           /* We don't know where the link script will put a named
8445              section, so return false in such a case.  */
8446           if (named_section)
8447             return false;
8448
8449           *is_readonly = readonly;
8450           return true;
8451         }
8452
8453       /* We don't know.  */
8454       return false;
8455     }
8456
8457   gcc_unreachable ();
8458 }
8459
8460 /* Generate code to load the address of a static var when flag_pic is set.  */
8461 static rtx_insn *
8462 arm_pic_static_addr (rtx orig, rtx reg)
8463 {
8464   rtx l1, labelno, offset_rtx;
8465   rtx_insn *insn;
8466
8467   gcc_assert (flag_pic);
8468
8469   bool is_readonly = false;
8470   bool info_known = false;
8471
8472   if (TARGET_FDPIC
8473       && SYMBOL_REF_P (orig)
8474       && !SYMBOL_REF_FUNCTION_P (orig))
8475     info_known = arm_is_segment_info_known (orig, &is_readonly);
8476
8477   if (TARGET_FDPIC
8478       && SYMBOL_REF_P (orig)
8479       && !SYMBOL_REF_FUNCTION_P (orig)
8480       && !info_known)
8481     {
8482       /* We don't know where orig is stored, so we have be
8483          pessimistic and use a GOT relocation.  */
8484       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8485
8486       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8487     }
8488   else if (TARGET_FDPIC
8489            && SYMBOL_REF_P (orig)
8490            && (SYMBOL_REF_FUNCTION_P (orig)
8491                || !is_readonly))
8492     {
8493       /* We use the GOTOFF relocation.  */
8494       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8495
8496       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8497       emit_insn (gen_movsi (reg, l1));
8498       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8499     }
8500   else
8501     {
8502       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8503          PC-relative access.  */
8504       /* We use an UNSPEC rather than a LABEL_REF because this label
8505          never appears in the code stream.  */
8506       labelno = GEN_INT (pic_labelno++);
8507       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8508       l1 = gen_rtx_CONST (VOIDmode, l1);
8509
8510       /* On the ARM the PC register contains 'dot + 8' at the time of the
8511          addition, on the Thumb it is 'dot + 4'.  */
8512       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8513       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8514                                    UNSPEC_SYMBOL_OFFSET);
8515       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8516
8517       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8518                                                    labelno));
8519     }
8520
8521   return insn;
8522 }
8523
8524 /* Return nonzero if X is valid as an ARM state addressing register.  */
8525 static int
8526 arm_address_register_rtx_p (rtx x, int strict_p)
8527 {
8528   int regno;
8529
8530   if (!REG_P (x))
8531     return 0;
8532
8533   regno = REGNO (x);
8534
8535   if (strict_p)
8536     return ARM_REGNO_OK_FOR_BASE_P (regno);
8537
8538   return (regno <= LAST_ARM_REGNUM
8539           || regno >= FIRST_PSEUDO_REGISTER
8540           || regno == FRAME_POINTER_REGNUM
8541           || regno == ARG_POINTER_REGNUM);
8542 }
8543
8544 /* Return TRUE if this rtx is the difference of a symbol and a label,
8545    and will reduce to a PC-relative relocation in the object file.
8546    Expressions like this can be left alone when generating PIC, rather
8547    than forced through the GOT.  */
8548 static int
8549 pcrel_constant_p (rtx x)
8550 {
8551   if (GET_CODE (x) == MINUS)
8552     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8553
8554   return FALSE;
8555 }
8556
8557 /* Return true if X will surely end up in an index register after next
8558    splitting pass.  */
8559 static bool
8560 will_be_in_index_register (const_rtx x)
8561 {
8562   /* arm.md: calculate_pic_address will split this into a register.  */
8563   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8564 }
8565
8566 /* Return nonzero if X is a valid ARM state address operand.  */
8567 int
8568 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8569                                 int strict_p)
8570 {
8571   bool use_ldrd;
8572   enum rtx_code code = GET_CODE (x);
8573
8574   if (arm_address_register_rtx_p (x, strict_p))
8575     return 1;
8576
8577   use_ldrd = (TARGET_LDRD
8578               && (mode == DImode || mode == DFmode));
8579
8580   if (code == POST_INC || code == PRE_DEC
8581       || ((code == PRE_INC || code == POST_DEC)
8582           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8583     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8584
8585   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8586            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8587            && GET_CODE (XEXP (x, 1)) == PLUS
8588            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8589     {
8590       rtx addend = XEXP (XEXP (x, 1), 1);
8591
8592       /* Don't allow ldrd post increment by register because it's hard
8593          to fixup invalid register choices.  */
8594       if (use_ldrd
8595           && GET_CODE (x) == POST_MODIFY
8596           && REG_P (addend))
8597         return 0;
8598
8599       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8600               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8601     }
8602
8603   /* After reload constants split into minipools will have addresses
8604      from a LABEL_REF.  */
8605   else if (reload_completed
8606            && (code == LABEL_REF
8607                || (code == CONST
8608                    && GET_CODE (XEXP (x, 0)) == PLUS
8609                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8610                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8611     return 1;
8612
8613   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8614     return 0;
8615
8616   else if (code == PLUS)
8617     {
8618       rtx xop0 = XEXP (x, 0);
8619       rtx xop1 = XEXP (x, 1);
8620
8621       return ((arm_address_register_rtx_p (xop0, strict_p)
8622                && ((CONST_INT_P (xop1)
8623                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8624                    || (!strict_p && will_be_in_index_register (xop1))))
8625               || (arm_address_register_rtx_p (xop1, strict_p)
8626                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8627     }
8628
8629 #if 0
8630   /* Reload currently can't handle MINUS, so disable this for now */
8631   else if (GET_CODE (x) == MINUS)
8632     {
8633       rtx xop0 = XEXP (x, 0);
8634       rtx xop1 = XEXP (x, 1);
8635
8636       return (arm_address_register_rtx_p (xop0, strict_p)
8637               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8638     }
8639 #endif
8640
8641   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8642            && code == SYMBOL_REF
8643            && CONSTANT_POOL_ADDRESS_P (x)
8644            && ! (flag_pic
8645                  && symbol_mentioned_p (get_pool_constant (x))
8646                  && ! pcrel_constant_p (get_pool_constant (x))))
8647     return 1;
8648
8649   return 0;
8650 }
8651
8652 /* Return true if we can avoid creating a constant pool entry for x.  */
8653 static bool
8654 can_avoid_literal_pool_for_label_p (rtx x)
8655 {
8656   /* Normally we can assign constant values to target registers without
8657      the help of constant pool.  But there are cases we have to use constant
8658      pool like:
8659      1) assign a label to register.
8660      2) sign-extend a 8bit value to 32bit and then assign to register.
8661
8662      Constant pool access in format:
8663      (set (reg r0) (mem (symbol_ref (".LC0"))))
8664      will cause the use of literal pool (later in function arm_reorg).
8665      So here we mark such format as an invalid format, then the compiler
8666      will adjust it into:
8667      (set (reg r0) (symbol_ref (".LC0")))
8668      (set (reg r0) (mem (reg r0))).
8669      No extra register is required, and (mem (reg r0)) won't cause the use
8670      of literal pools.  */
8671   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8672       && CONSTANT_POOL_ADDRESS_P (x))
8673     return 1;
8674   return 0;
8675 }
8676
8677
8678 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8679 static int
8680 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8681 {
8682   bool use_ldrd;
8683   enum rtx_code code = GET_CODE (x);
8684
8685   /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8686      can store and load it like any other 16-bit value.  */
8687   if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8688     mode = HImode;
8689
8690   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8691     return mve_vector_mem_operand (mode, x, strict_p);
8692
8693   if (arm_address_register_rtx_p (x, strict_p))
8694     return 1;
8695
8696   use_ldrd = (TARGET_LDRD
8697               && (mode == DImode || mode == DFmode));
8698
8699   if (code == POST_INC || code == PRE_DEC
8700       || ((code == PRE_INC || code == POST_DEC)
8701           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8702     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8703
8704   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8705            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8706            && GET_CODE (XEXP (x, 1)) == PLUS
8707            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8708     {
8709       /* Thumb-2 only has autoincrement by constant.  */
8710       rtx addend = XEXP (XEXP (x, 1), 1);
8711       HOST_WIDE_INT offset;
8712
8713       if (!CONST_INT_P (addend))
8714         return 0;
8715
8716       offset = INTVAL(addend);
8717       if (GET_MODE_SIZE (mode) <= 4)
8718         return (offset > -256 && offset < 256);
8719
8720       return (use_ldrd && offset > -1024 && offset < 1024
8721               && (offset & 3) == 0);
8722     }
8723
8724   /* After reload constants split into minipools will have addresses
8725      from a LABEL_REF.  */
8726   else if (reload_completed
8727            && (code == LABEL_REF
8728                || (code == CONST
8729                    && GET_CODE (XEXP (x, 0)) == PLUS
8730                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8731                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8732     return 1;
8733
8734   else if (mode == TImode
8735            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8736            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8737     return 0;
8738
8739   else if (code == PLUS)
8740     {
8741       rtx xop0 = XEXP (x, 0);
8742       rtx xop1 = XEXP (x, 1);
8743
8744       return ((arm_address_register_rtx_p (xop0, strict_p)
8745                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8746                    || (!strict_p && will_be_in_index_register (xop1))))
8747               || (arm_address_register_rtx_p (xop1, strict_p)
8748                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8749     }
8750
8751   else if (can_avoid_literal_pool_for_label_p (x))
8752     return 0;
8753
8754   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8755            && code == SYMBOL_REF
8756            && CONSTANT_POOL_ADDRESS_P (x)
8757            && ! (flag_pic
8758                  && symbol_mentioned_p (get_pool_constant (x))
8759                  && ! pcrel_constant_p (get_pool_constant (x))))
8760     return 1;
8761
8762   return 0;
8763 }
8764
8765 /* Return nonzero if INDEX is valid for an address index operand in
8766    ARM state.  */
8767 static int
8768 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8769                         int strict_p)
8770 {
8771   HOST_WIDE_INT range;
8772   enum rtx_code code = GET_CODE (index);
8773
8774   /* Standard coprocessor addressing modes.  */
8775   if (TARGET_HARD_FLOAT
8776       && (mode == SFmode || mode == DFmode))
8777     return (code == CONST_INT && INTVAL (index) < 1024
8778             && INTVAL (index) > -1024
8779             && (INTVAL (index) & 3) == 0);
8780
8781   /* For quad modes, we restrict the constant offset to be slightly less
8782      than what the instruction format permits.  We do this because for
8783      quad mode moves, we will actually decompose them into two separate
8784      double-mode reads or writes.  INDEX must therefore be a valid
8785      (double-mode) offset and so should INDEX+8.  */
8786   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8787     return (code == CONST_INT
8788             && INTVAL (index) < 1016
8789             && INTVAL (index) > -1024
8790             && (INTVAL (index) & 3) == 0);
8791
8792   /* We have no such constraint on double mode offsets, so we permit the
8793      full range of the instruction format.  */
8794   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8795     return (code == CONST_INT
8796             && INTVAL (index) < 1024
8797             && INTVAL (index) > -1024
8798             && (INTVAL (index) & 3) == 0);
8799
8800   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8801     return (code == CONST_INT
8802             && INTVAL (index) < 1024
8803             && INTVAL (index) > -1024
8804             && (INTVAL (index) & 3) == 0);
8805
8806   if (arm_address_register_rtx_p (index, strict_p)
8807       && (GET_MODE_SIZE (mode) <= 4))
8808     return 1;
8809
8810   if (mode == DImode || mode == DFmode)
8811     {
8812       if (code == CONST_INT)
8813         {
8814           HOST_WIDE_INT val = INTVAL (index);
8815
8816           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8817              If vldr is selected it uses arm_coproc_mem_operand.  */
8818           if (TARGET_LDRD)
8819             return val > -256 && val < 256;
8820           else
8821             return val > -4096 && val < 4092;
8822         }
8823
8824       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8825     }
8826
8827   if (GET_MODE_SIZE (mode) <= 4
8828       && ! (arm_arch4
8829             && (mode == HImode
8830                 || mode == HFmode
8831                 || (mode == QImode && outer == SIGN_EXTEND))))
8832     {
8833       if (code == MULT)
8834         {
8835           rtx xiop0 = XEXP (index, 0);
8836           rtx xiop1 = XEXP (index, 1);
8837
8838           return ((arm_address_register_rtx_p (xiop0, strict_p)
8839                    && power_of_two_operand (xiop1, SImode))
8840                   || (arm_address_register_rtx_p (xiop1, strict_p)
8841                       && power_of_two_operand (xiop0, SImode)));
8842         }
8843       else if (code == LSHIFTRT || code == ASHIFTRT
8844                || code == ASHIFT || code == ROTATERT)
8845         {
8846           rtx op = XEXP (index, 1);
8847
8848           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8849                   && CONST_INT_P (op)
8850                   && INTVAL (op) > 0
8851                   && INTVAL (op) <= 31);
8852         }
8853     }
8854
8855   /* For ARM v4 we may be doing a sign-extend operation during the
8856      load.  */
8857   if (arm_arch4)
8858     {
8859       if (mode == HImode
8860           || mode == HFmode
8861           || (outer == SIGN_EXTEND && mode == QImode))
8862         range = 256;
8863       else
8864         range = 4096;
8865     }
8866   else
8867     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8868
8869   return (code == CONST_INT
8870           && INTVAL (index) < range
8871           && INTVAL (index) > -range);
8872 }
8873
8874 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8875    index operand.  i.e. 1, 2, 4 or 8.  */
8876 static bool
8877 thumb2_index_mul_operand (rtx op)
8878 {
8879   HOST_WIDE_INT val;
8880
8881   if (!CONST_INT_P (op))
8882     return false;
8883
8884   val = INTVAL(op);
8885   return (val == 1 || val == 2 || val == 4 || val == 8);
8886 }
8887
8888 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8889 static int
8890 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8891 {
8892   enum rtx_code code = GET_CODE (index);
8893
8894   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8895   /* Standard coprocessor addressing modes.  */
8896   if (TARGET_VFP_BASE
8897       && (mode == SFmode || mode == DFmode))
8898     return (code == CONST_INT && INTVAL (index) < 1024
8899             /* Thumb-2 allows only > -256 index range for it's core register
8900                load/stores. Since we allow SF/DF in core registers, we have
8901                to use the intersection between -256~4096 (core) and -1024~1024
8902                (coprocessor).  */
8903             && INTVAL (index) > -256
8904             && (INTVAL (index) & 3) == 0);
8905
8906   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8907     {
8908       /* For DImode assume values will usually live in core regs
8909          and only allow LDRD addressing modes.  */
8910       if (!TARGET_LDRD || mode != DImode)
8911         return (code == CONST_INT
8912                 && INTVAL (index) < 1024
8913                 && INTVAL (index) > -1024
8914                 && (INTVAL (index) & 3) == 0);
8915     }
8916
8917   /* For quad modes, we restrict the constant offset to be slightly less
8918      than what the instruction format permits.  We do this because for
8919      quad mode moves, we will actually decompose them into two separate
8920      double-mode reads or writes.  INDEX must therefore be a valid
8921      (double-mode) offset and so should INDEX+8.  */
8922   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8923     return (code == CONST_INT
8924             && INTVAL (index) < 1016
8925             && INTVAL (index) > -1024
8926             && (INTVAL (index) & 3) == 0);
8927
8928   /* We have no such constraint on double mode offsets, so we permit the
8929      full range of the instruction format.  */
8930   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8931     return (code == CONST_INT
8932             && INTVAL (index) < 1024
8933             && INTVAL (index) > -1024
8934             && (INTVAL (index) & 3) == 0);
8935
8936   if (arm_address_register_rtx_p (index, strict_p)
8937       && (GET_MODE_SIZE (mode) <= 4))
8938     return 1;
8939
8940   if (mode == DImode || mode == DFmode)
8941     {
8942       if (code == CONST_INT)
8943         {
8944           HOST_WIDE_INT val = INTVAL (index);
8945           /* Thumb-2 ldrd only has reg+const addressing modes.
8946              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8947              If vldr is selected it uses arm_coproc_mem_operand.  */
8948           if (TARGET_LDRD)
8949             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8950           else
8951             return IN_RANGE (val, -255, 4095 - 4);
8952         }
8953       else
8954         return 0;
8955     }
8956
8957   if (code == MULT)
8958     {
8959       rtx xiop0 = XEXP (index, 0);
8960       rtx xiop1 = XEXP (index, 1);
8961
8962       return ((arm_address_register_rtx_p (xiop0, strict_p)
8963                && thumb2_index_mul_operand (xiop1))
8964               || (arm_address_register_rtx_p (xiop1, strict_p)
8965                   && thumb2_index_mul_operand (xiop0)));
8966     }
8967   else if (code == ASHIFT)
8968     {
8969       rtx op = XEXP (index, 1);
8970
8971       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8972               && CONST_INT_P (op)
8973               && INTVAL (op) > 0
8974               && INTVAL (op) <= 3);
8975     }
8976
8977   return (code == CONST_INT
8978           && INTVAL (index) < 4096
8979           && INTVAL (index) > -256);
8980 }
8981
8982 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8983 static int
8984 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8985 {
8986   int regno;
8987
8988   if (!REG_P (x))
8989     return 0;
8990
8991   regno = REGNO (x);
8992
8993   if (strict_p)
8994     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8995
8996   return (regno <= LAST_LO_REGNUM
8997           || regno > LAST_VIRTUAL_REGISTER
8998           || regno == FRAME_POINTER_REGNUM
8999           || (GET_MODE_SIZE (mode) >= 4
9000               && (regno == STACK_POINTER_REGNUM
9001                   || regno >= FIRST_PSEUDO_REGISTER
9002                   || x == hard_frame_pointer_rtx
9003                   || x == arg_pointer_rtx)));
9004 }
9005
9006 /* Return nonzero if x is a legitimate index register.  This is the case
9007    for any base register that can access a QImode object.  */
9008 inline static int
9009 thumb1_index_register_rtx_p (rtx x, int strict_p)
9010 {
9011   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9012 }
9013
9014 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9015
9016    The AP may be eliminated to either the SP or the FP, so we use the
9017    least common denominator, e.g. SImode, and offsets from 0 to 64.
9018
9019    ??? Verify whether the above is the right approach.
9020
9021    ??? Also, the FP may be eliminated to the SP, so perhaps that
9022    needs special handling also.
9023
9024    ??? Look at how the mips16 port solves this problem.  It probably uses
9025    better ways to solve some of these problems.
9026
9027    Although it is not incorrect, we don't accept QImode and HImode
9028    addresses based on the frame pointer or arg pointer until the
9029    reload pass starts.  This is so that eliminating such addresses
9030    into stack based ones won't produce impossible code.  */
9031 int
9032 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9033 {
9034   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9035     return 0;
9036
9037   /* ??? Not clear if this is right.  Experiment.  */
9038   if (GET_MODE_SIZE (mode) < 4
9039       && !(reload_in_progress || reload_completed)
9040       && (reg_mentioned_p (frame_pointer_rtx, x)
9041           || reg_mentioned_p (arg_pointer_rtx, x)
9042           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9043           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9044           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9045           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9046     return 0;
9047
9048   /* Accept any base register.  SP only in SImode or larger.  */
9049   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9050     return 1;
9051
9052   /* This is PC relative data before arm_reorg runs.  */
9053   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9054            && SYMBOL_REF_P (x)
9055            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9056            && !arm_disable_literal_pool)
9057     return 1;
9058
9059   /* This is PC relative data after arm_reorg runs.  */
9060   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9061            && reload_completed
9062            && (LABEL_REF_P (x)
9063                || (GET_CODE (x) == CONST
9064                    && GET_CODE (XEXP (x, 0)) == PLUS
9065                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9066                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9067     return 1;
9068
9069   /* Post-inc indexing only supported for SImode and larger.  */
9070   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9071            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9072     return 1;
9073
9074   else if (GET_CODE (x) == PLUS)
9075     {
9076       /* REG+REG address can be any two index registers.  */
9077       /* We disallow FRAME+REG addressing since we know that FRAME
9078          will be replaced with STACK, and SP relative addressing only
9079          permits SP+OFFSET.  */
9080       if (GET_MODE_SIZE (mode) <= 4
9081           && XEXP (x, 0) != frame_pointer_rtx
9082           && XEXP (x, 1) != frame_pointer_rtx
9083           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9084           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9085               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9086         return 1;
9087
9088       /* REG+const has 5-7 bit offset for non-SP registers.  */
9089       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9090                 || XEXP (x, 0) == arg_pointer_rtx)
9091                && CONST_INT_P (XEXP (x, 1))
9092                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9093         return 1;
9094
9095       /* REG+const has 10-bit offset for SP, but only SImode and
9096          larger is supported.  */
9097       /* ??? Should probably check for DI/DFmode overflow here
9098          just like GO_IF_LEGITIMATE_OFFSET does.  */
9099       else if (REG_P (XEXP (x, 0))
9100                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9101                && GET_MODE_SIZE (mode) >= 4
9102                && CONST_INT_P (XEXP (x, 1))
9103                && INTVAL (XEXP (x, 1)) >= 0
9104                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9105                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9106         return 1;
9107
9108       else if (REG_P (XEXP (x, 0))
9109                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9110                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9111                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9112                        && REGNO (XEXP (x, 0))
9113                           <= LAST_VIRTUAL_POINTER_REGISTER))
9114                && GET_MODE_SIZE (mode) >= 4
9115                && CONST_INT_P (XEXP (x, 1))
9116                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9117         return 1;
9118     }
9119
9120   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9121            && GET_MODE_SIZE (mode) == 4
9122            && SYMBOL_REF_P (x)
9123            && CONSTANT_POOL_ADDRESS_P (x)
9124            && !arm_disable_literal_pool
9125            && ! (flag_pic
9126                  && symbol_mentioned_p (get_pool_constant (x))
9127                  && ! pcrel_constant_p (get_pool_constant (x))))
9128     return 1;
9129
9130   return 0;
9131 }
9132
9133 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9134    instruction of mode MODE.  */
9135 int
9136 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9137 {
9138   switch (GET_MODE_SIZE (mode))
9139     {
9140     case 1:
9141       return val >= 0 && val < 32;
9142
9143     case 2:
9144       return val >= 0 && val < 64 && (val & 1) == 0;
9145
9146     default:
9147       return (val >= 0
9148               && (val + GET_MODE_SIZE (mode)) <= 128
9149               && (val & 3) == 0);
9150     }
9151 }
9152
9153 bool
9154 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9155 {
9156   if (TARGET_ARM)
9157     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9158   else if (TARGET_THUMB2)
9159     return thumb2_legitimate_address_p (mode, x, strict_p);
9160   else /* if (TARGET_THUMB1) */
9161     return thumb1_legitimate_address_p (mode, x, strict_p);
9162 }
9163
9164 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9165
9166    Given an rtx X being reloaded into a reg required to be
9167    in class CLASS, return the class of reg to actually use.
9168    In general this is just CLASS, but for the Thumb core registers and
9169    immediate constants we prefer a LO_REGS class or a subset.  */
9170
9171 static reg_class_t
9172 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9173 {
9174   if (TARGET_32BIT)
9175     return rclass;
9176   else
9177     {
9178       if (rclass == GENERAL_REGS)
9179         return LO_REGS;
9180       else
9181         return rclass;
9182     }
9183 }
9184
9185 /* Build the SYMBOL_REF for __tls_get_addr.  */
9186
9187 static GTY(()) rtx tls_get_addr_libfunc;
9188
9189 static rtx
9190 get_tls_get_addr (void)
9191 {
9192   if (!tls_get_addr_libfunc)
9193     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9194   return tls_get_addr_libfunc;
9195 }
9196
9197 rtx
9198 arm_load_tp (rtx target)
9199 {
9200   if (!target)
9201     target = gen_reg_rtx (SImode);
9202
9203   if (TARGET_HARD_TP)
9204     {
9205       /* Can return in any reg.  */
9206       emit_insn (gen_load_tp_hard (target));
9207     }
9208   else
9209     {
9210       /* Always returned in r0.  Immediately copy the result into a pseudo,
9211          otherwise other uses of r0 (e.g. setting up function arguments) may
9212          clobber the value.  */
9213
9214       rtx tmp;
9215
9216       if (TARGET_FDPIC)
9217         {
9218           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9219           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9220
9221           emit_insn (gen_load_tp_soft_fdpic ());
9222
9223           /* Restore r9.  */
9224           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9225         }
9226       else
9227         emit_insn (gen_load_tp_soft ());
9228
9229       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9230       emit_move_insn (target, tmp);
9231     }
9232   return target;
9233 }
9234
9235 static rtx
9236 load_tls_operand (rtx x, rtx reg)
9237 {
9238   rtx tmp;
9239
9240   if (reg == NULL_RTX)
9241     reg = gen_reg_rtx (SImode);
9242
9243   tmp = gen_rtx_CONST (SImode, x);
9244
9245   emit_move_insn (reg, tmp);
9246
9247   return reg;
9248 }
9249
9250 static rtx_insn *
9251 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9252 {
9253   rtx label, labelno = NULL_RTX, sum;
9254
9255   gcc_assert (reloc != TLS_DESCSEQ);
9256   start_sequence ();
9257
9258   if (TARGET_FDPIC)
9259     {
9260       sum = gen_rtx_UNSPEC (Pmode,
9261                             gen_rtvec (2, x, GEN_INT (reloc)),
9262                             UNSPEC_TLS);
9263     }
9264   else
9265     {
9266       labelno = GEN_INT (pic_labelno++);
9267       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9268       label = gen_rtx_CONST (VOIDmode, label);
9269
9270       sum = gen_rtx_UNSPEC (Pmode,
9271                             gen_rtvec (4, x, GEN_INT (reloc), label,
9272                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9273                             UNSPEC_TLS);
9274     }
9275   reg = load_tls_operand (sum, reg);
9276
9277   if (TARGET_FDPIC)
9278       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9279   else if (TARGET_ARM)
9280     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9281   else
9282     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9283
9284   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9285                                      LCT_PURE, /* LCT_CONST?  */
9286                                      Pmode, reg, Pmode);
9287
9288   rtx_insn *insns = get_insns ();
9289   end_sequence ();
9290
9291   return insns;
9292 }
9293
9294 static rtx
9295 arm_tls_descseq_addr (rtx x, rtx reg)
9296 {
9297   rtx labelno = GEN_INT (pic_labelno++);
9298   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9299   rtx sum = gen_rtx_UNSPEC (Pmode,
9300                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9301                                        gen_rtx_CONST (VOIDmode, label),
9302                                        GEN_INT (!TARGET_ARM)),
9303                             UNSPEC_TLS);
9304   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9305
9306   emit_insn (gen_tlscall (x, labelno));
9307   if (!reg)
9308     reg = gen_reg_rtx (SImode);
9309   else
9310     gcc_assert (REGNO (reg) != R0_REGNUM);
9311
9312   emit_move_insn (reg, reg0);
9313
9314   return reg;
9315 }
9316
9317
9318 rtx
9319 legitimize_tls_address (rtx x, rtx reg)
9320 {
9321   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9322   rtx_insn *insns;
9323   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9324
9325   switch (model)
9326     {
9327     case TLS_MODEL_GLOBAL_DYNAMIC:
9328       if (TARGET_GNU2_TLS)
9329         {
9330           gcc_assert (!TARGET_FDPIC);
9331
9332           reg = arm_tls_descseq_addr (x, reg);
9333
9334           tp = arm_load_tp (NULL_RTX);
9335
9336           dest = gen_rtx_PLUS (Pmode, tp, reg);
9337         }
9338       else
9339         {
9340           /* Original scheme */
9341           if (TARGET_FDPIC)
9342             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9343           else
9344             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9345           dest = gen_reg_rtx (Pmode);
9346           emit_libcall_block (insns, dest, ret, x);
9347         }
9348       return dest;
9349
9350     case TLS_MODEL_LOCAL_DYNAMIC:
9351       if (TARGET_GNU2_TLS)
9352         {
9353           gcc_assert (!TARGET_FDPIC);
9354
9355           reg = arm_tls_descseq_addr (x, reg);
9356
9357           tp = arm_load_tp (NULL_RTX);
9358
9359           dest = gen_rtx_PLUS (Pmode, tp, reg);
9360         }
9361       else
9362         {
9363           if (TARGET_FDPIC)
9364             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9365           else
9366             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9367
9368           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9369              share the LDM result with other LD model accesses.  */
9370           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9371                                 UNSPEC_TLS);
9372           dest = gen_reg_rtx (Pmode);
9373           emit_libcall_block (insns, dest, ret, eqv);
9374
9375           /* Load the addend.  */
9376           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9377                                                      GEN_INT (TLS_LDO32)),
9378                                    UNSPEC_TLS);
9379           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9380           dest = gen_rtx_PLUS (Pmode, dest, addend);
9381         }
9382       return dest;
9383
9384     case TLS_MODEL_INITIAL_EXEC:
9385       if (TARGET_FDPIC)
9386         {
9387           sum = gen_rtx_UNSPEC (Pmode,
9388                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9389                                 UNSPEC_TLS);
9390           reg = load_tls_operand (sum, reg);
9391           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9392           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9393         }
9394       else
9395         {
9396           labelno = GEN_INT (pic_labelno++);
9397           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9398           label = gen_rtx_CONST (VOIDmode, label);
9399           sum = gen_rtx_UNSPEC (Pmode,
9400                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9401                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9402                                 UNSPEC_TLS);
9403           reg = load_tls_operand (sum, reg);
9404
9405           if (TARGET_ARM)
9406             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9407           else if (TARGET_THUMB2)
9408             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9409           else
9410             {
9411               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9412               emit_move_insn (reg, gen_const_mem (SImode, reg));
9413             }
9414         }
9415
9416       tp = arm_load_tp (NULL_RTX);
9417
9418       return gen_rtx_PLUS (Pmode, tp, reg);
9419
9420     case TLS_MODEL_LOCAL_EXEC:
9421       tp = arm_load_tp (NULL_RTX);
9422
9423       reg = gen_rtx_UNSPEC (Pmode,
9424                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9425                             UNSPEC_TLS);
9426       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9427
9428       return gen_rtx_PLUS (Pmode, tp, reg);
9429
9430     default:
9431       abort ();
9432     }
9433 }
9434
9435 /* Try machine-dependent ways of modifying an illegitimate address
9436    to be legitimate.  If we find one, return the new, valid address.  */
9437 rtx
9438 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9439 {
9440   if (arm_tls_referenced_p (x))
9441     {
9442       rtx addend = NULL;
9443
9444       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9445         {
9446           addend = XEXP (XEXP (x, 0), 1);
9447           x = XEXP (XEXP (x, 0), 0);
9448         }
9449
9450       if (!SYMBOL_REF_P (x))
9451         return x;
9452
9453       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9454
9455       x = legitimize_tls_address (x, NULL_RTX);
9456
9457       if (addend)
9458         {
9459           x = gen_rtx_PLUS (SImode, x, addend);
9460           orig_x = x;
9461         }
9462       else
9463         return x;
9464     }
9465
9466   if (TARGET_THUMB1)
9467     return thumb_legitimize_address (x, orig_x, mode);
9468
9469   if (GET_CODE (x) == PLUS)
9470     {
9471       rtx xop0 = XEXP (x, 0);
9472       rtx xop1 = XEXP (x, 1);
9473
9474       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9475         xop0 = force_reg (SImode, xop0);
9476
9477       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9478           && !symbol_mentioned_p (xop1))
9479         xop1 = force_reg (SImode, xop1);
9480
9481       if (ARM_BASE_REGISTER_RTX_P (xop0)
9482           && CONST_INT_P (xop1))
9483         {
9484           HOST_WIDE_INT n, low_n;
9485           rtx base_reg, val;
9486           n = INTVAL (xop1);
9487
9488           /* VFP addressing modes actually allow greater offsets, but for
9489              now we just stick with the lowest common denominator.  */
9490           if (mode == DImode || mode == DFmode)
9491             {
9492               low_n = n & 0x0f;
9493               n &= ~0x0f;
9494               if (low_n > 4)
9495                 {
9496                   n += 16;
9497                   low_n -= 16;
9498                 }
9499             }
9500           else
9501             {
9502               low_n = ((mode) == TImode ? 0
9503                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9504               n -= low_n;
9505             }
9506
9507           base_reg = gen_reg_rtx (SImode);
9508           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9509           emit_move_insn (base_reg, val);
9510           x = plus_constant (Pmode, base_reg, low_n);
9511         }
9512       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9513         x = gen_rtx_PLUS (SImode, xop0, xop1);
9514     }
9515
9516   /* XXX We don't allow MINUS any more -- see comment in
9517      arm_legitimate_address_outer_p ().  */
9518   else if (GET_CODE (x) == MINUS)
9519     {
9520       rtx xop0 = XEXP (x, 0);
9521       rtx xop1 = XEXP (x, 1);
9522
9523       if (CONSTANT_P (xop0))
9524         xop0 = force_reg (SImode, xop0);
9525
9526       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9527         xop1 = force_reg (SImode, xop1);
9528
9529       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9530         x = gen_rtx_MINUS (SImode, xop0, xop1);
9531     }
9532
9533   /* Make sure to take full advantage of the pre-indexed addressing mode
9534      with absolute addresses which often allows for the base register to
9535      be factorized for multiple adjacent memory references, and it might
9536      even allows for the mini pool to be avoided entirely. */
9537   else if (CONST_INT_P (x) && optimize > 0)
9538     {
9539       unsigned int bits;
9540       HOST_WIDE_INT mask, base, index;
9541       rtx base_reg;
9542
9543       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9544          only use a 8-bit index. So let's use a 12-bit index for
9545          SImode only and hope that arm_gen_constant will enable LDRB
9546          to use more bits. */
9547       bits = (mode == SImode) ? 12 : 8;
9548       mask = (1 << bits) - 1;
9549       base = INTVAL (x) & ~mask;
9550       index = INTVAL (x) & mask;
9551       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9552         {
9553           /* It'll most probably be more efficient to generate the
9554              base with more bits set and use a negative index instead.
9555              Don't do this for Thumb as negative offsets are much more
9556              limited.  */
9557           base |= mask;
9558           index -= mask;
9559         }
9560       base_reg = force_reg (SImode, GEN_INT (base));
9561       x = plus_constant (Pmode, base_reg, index);
9562     }
9563
9564   if (flag_pic)
9565     {
9566       /* We need to find and carefully transform any SYMBOL and LABEL
9567          references; so go back to the original address expression.  */
9568       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9569                                           false /*compute_now*/);
9570
9571       if (new_x != orig_x)
9572         x = new_x;
9573     }
9574
9575   return x;
9576 }
9577
9578
9579 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9580    to be legitimate.  If we find one, return the new, valid address.  */
9581 rtx
9582 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9583 {
9584   if (GET_CODE (x) == PLUS
9585       && CONST_INT_P (XEXP (x, 1))
9586       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9587           || INTVAL (XEXP (x, 1)) < 0))
9588     {
9589       rtx xop0 = XEXP (x, 0);
9590       rtx xop1 = XEXP (x, 1);
9591       HOST_WIDE_INT offset = INTVAL (xop1);
9592
9593       /* Try and fold the offset into a biasing of the base register and
9594          then offsetting that.  Don't do this when optimizing for space
9595          since it can cause too many CSEs.  */
9596       if (optimize_size && offset >= 0
9597           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9598         {
9599           HOST_WIDE_INT delta;
9600
9601           if (offset >= 256)
9602             delta = offset - (256 - GET_MODE_SIZE (mode));
9603           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9604             delta = 31 * GET_MODE_SIZE (mode);
9605           else
9606             delta = offset & (~31 * GET_MODE_SIZE (mode));
9607
9608           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9609                                 NULL_RTX);
9610           x = plus_constant (Pmode, xop0, delta);
9611         }
9612       else if (offset < 0 && offset > -256)
9613         /* Small negative offsets are best done with a subtract before the
9614            dereference, forcing these into a register normally takes two
9615            instructions.  */
9616         x = force_operand (x, NULL_RTX);
9617       else
9618         {
9619           /* For the remaining cases, force the constant into a register.  */
9620           xop1 = force_reg (SImode, xop1);
9621           x = gen_rtx_PLUS (SImode, xop0, xop1);
9622         }
9623     }
9624   else if (GET_CODE (x) == PLUS
9625            && s_register_operand (XEXP (x, 1), SImode)
9626            && !s_register_operand (XEXP (x, 0), SImode))
9627     {
9628       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9629
9630       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9631     }
9632
9633   if (flag_pic)
9634     {
9635       /* We need to find and carefully transform any SYMBOL and LABEL
9636          references; so go back to the original address expression.  */
9637       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9638                                           false /*compute_now*/);
9639
9640       if (new_x != orig_x)
9641         x = new_x;
9642     }
9643
9644   return x;
9645 }
9646
9647 /* Return TRUE if X contains any TLS symbol references.  */
9648
9649 bool
9650 arm_tls_referenced_p (rtx x)
9651 {
9652   if (! TARGET_HAVE_TLS)
9653     return false;
9654
9655   subrtx_iterator::array_type array;
9656   FOR_EACH_SUBRTX (iter, array, x, ALL)
9657     {
9658       const_rtx x = *iter;
9659       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9660         {
9661           /* ARM currently does not provide relocations to encode TLS variables
9662              into AArch32 instructions, only data, so there is no way to
9663              currently implement these if a literal pool is disabled.  */
9664           if (arm_disable_literal_pool)
9665             sorry ("accessing thread-local storage is not currently supported "
9666                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9667
9668           return true;
9669         }
9670
9671       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9672          TLS offsets, not real symbol references.  */
9673       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9674         iter.skip_subrtxes ();
9675     }
9676   return false;
9677 }
9678
9679 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9680
9681    On the ARM, allow any integer (invalid ones are removed later by insn
9682    patterns), nice doubles and symbol_refs which refer to the function's
9683    constant pool XXX.
9684
9685    When generating pic allow anything.  */
9686
9687 static bool
9688 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9689 {
9690   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9691     return false;
9692
9693   return flag_pic || !label_mentioned_p (x);
9694 }
9695
9696 static bool
9697 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9698 {
9699   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9700      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9701      for ARMv8-M Baseline or later the result is valid.  */
9702   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9703     x = XEXP (x, 0);
9704
9705   return (CONST_INT_P (x)
9706           || CONST_DOUBLE_P (x)
9707           || CONSTANT_ADDRESS_P (x)
9708           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9709           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9710              we build the symbol address with upper/lower
9711              relocations.  */
9712           || (TARGET_THUMB1
9713               && !label_mentioned_p (x)
9714               && arm_valid_symbolic_address_p (x)
9715               && arm_disable_literal_pool)
9716           || flag_pic);
9717 }
9718
9719 static bool
9720 arm_legitimate_constant_p (machine_mode mode, rtx x)
9721 {
9722   return (!arm_cannot_force_const_mem (mode, x)
9723           && (TARGET_32BIT
9724               ? arm_legitimate_constant_p_1 (mode, x)
9725               : thumb_legitimate_constant_p (mode, x)));
9726 }
9727
9728 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9729
9730 static bool
9731 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9732 {
9733   rtx base, offset;
9734   split_const (x, &base, &offset);
9735
9736   if (SYMBOL_REF_P (base))
9737     {
9738       /* Function symbols cannot have an offset due to the Thumb bit.  */
9739       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9740           && INTVAL (offset) != 0)
9741         return true;
9742
9743       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9744           && !offset_within_block_p (base, INTVAL (offset)))
9745         return true;
9746     }
9747   return arm_tls_referenced_p (x);
9748 }
9749 \f
9750 #define REG_OR_SUBREG_REG(X)                                            \
9751   (REG_P (X)                                                    \
9752    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9753
9754 #define REG_OR_SUBREG_RTX(X)                    \
9755    (REG_P (X) ? (X) : SUBREG_REG (X))
9756
9757 static inline int
9758 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9759 {
9760   machine_mode mode = GET_MODE (x);
9761   int total, words;
9762
9763   switch (code)
9764     {
9765     case ASHIFT:
9766     case ASHIFTRT:
9767     case LSHIFTRT:
9768     case ROTATERT:
9769       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9770
9771     case PLUS:
9772     case MINUS:
9773     case COMPARE:
9774     case NEG:
9775     case NOT:
9776       return COSTS_N_INSNS (1);
9777
9778     case MULT:
9779       if (arm_arch6m && arm_m_profile_small_mul)
9780         return COSTS_N_INSNS (32);
9781
9782       if (CONST_INT_P (XEXP (x, 1)))
9783         {
9784           int cycles = 0;
9785           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9786
9787           while (i)
9788             {
9789               i >>= 2;
9790               cycles++;
9791             }
9792           return COSTS_N_INSNS (2) + cycles;
9793         }
9794       return COSTS_N_INSNS (1) + 16;
9795
9796     case SET:
9797       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9798          the mode.  */
9799       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9800       return (COSTS_N_INSNS (words)
9801               + 4 * ((MEM_P (SET_SRC (x)))
9802                      + MEM_P (SET_DEST (x))));
9803
9804     case CONST_INT:
9805       if (outer == SET)
9806         {
9807           if (UINTVAL (x) < 256
9808               /* 16-bit constant.  */
9809               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9810             return 0;
9811           if (thumb_shiftable_const (INTVAL (x)))
9812             return COSTS_N_INSNS (2);
9813           return arm_disable_literal_pool
9814             ? COSTS_N_INSNS (8)
9815             : COSTS_N_INSNS (3);
9816         }
9817       else if ((outer == PLUS || outer == COMPARE)
9818                && INTVAL (x) < 256 && INTVAL (x) > -256)
9819         return 0;
9820       else if ((outer == IOR || outer == XOR || outer == AND)
9821                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9822         return COSTS_N_INSNS (1);
9823       else if (outer == AND)
9824         {
9825           int i;
9826           /* This duplicates the tests in the andsi3 expander.  */
9827           for (i = 9; i <= 31; i++)
9828             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9829                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9830               return COSTS_N_INSNS (2);
9831         }
9832       else if (outer == ASHIFT || outer == ASHIFTRT
9833                || outer == LSHIFTRT)
9834         return 0;
9835       return COSTS_N_INSNS (2);
9836
9837     case CONST:
9838     case CONST_DOUBLE:
9839     case LABEL_REF:
9840     case SYMBOL_REF:
9841       return COSTS_N_INSNS (3);
9842
9843     case UDIV:
9844     case UMOD:
9845     case DIV:
9846     case MOD:
9847       return 100;
9848
9849     case TRUNCATE:
9850       return 99;
9851
9852     case AND:
9853     case XOR:
9854     case IOR:
9855       /* XXX guess.  */
9856       return 8;
9857
9858     case MEM:
9859       /* XXX another guess.  */
9860       /* Memory costs quite a lot for the first word, but subsequent words
9861          load at the equivalent of a single insn each.  */
9862       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9863               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9864                  ? 4 : 0));
9865
9866     case IF_THEN_ELSE:
9867       /* XXX a guess.  */
9868       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9869         return 14;
9870       return 2;
9871
9872     case SIGN_EXTEND:
9873     case ZERO_EXTEND:
9874       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9875       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9876
9877       if (mode == SImode)
9878         return total;
9879
9880       if (arm_arch6)
9881         return total + COSTS_N_INSNS (1);
9882
9883       /* Assume a two-shift sequence.  Increase the cost slightly so
9884          we prefer actual shifts over an extend operation.  */
9885       return total + 1 + COSTS_N_INSNS (2);
9886
9887     default:
9888       return 99;
9889     }
9890 }
9891
9892 /* Estimates the size cost of thumb1 instructions.
9893    For now most of the code is copied from thumb1_rtx_costs. We need more
9894    fine grain tuning when we have more related test cases.  */
9895 static inline int
9896 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9897 {
9898   machine_mode mode = GET_MODE (x);
9899   int words, cost;
9900
9901   switch (code)
9902     {
9903     case ASHIFT:
9904     case ASHIFTRT:
9905     case LSHIFTRT:
9906     case ROTATERT:
9907       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9908
9909     case PLUS:
9910     case MINUS:
9911       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9912          defined by RTL expansion, especially for the expansion of
9913          multiplication.  */
9914       if ((GET_CODE (XEXP (x, 0)) == MULT
9915            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9916           || (GET_CODE (XEXP (x, 1)) == MULT
9917               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9918         return COSTS_N_INSNS (2);
9919       /* Fall through.  */
9920     case COMPARE:
9921     case NEG:
9922     case NOT:
9923       return COSTS_N_INSNS (1);
9924
9925     case MULT:
9926       if (CONST_INT_P (XEXP (x, 1)))
9927         {
9928           /* Thumb1 mul instruction can't operate on const. We must Load it
9929              into a register first.  */
9930           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9931           /* For the targets which have a very small and high-latency multiply
9932              unit, we prefer to synthesize the mult with up to 5 instructions,
9933              giving a good balance between size and performance.  */
9934           if (arm_arch6m && arm_m_profile_small_mul)
9935             return COSTS_N_INSNS (5);
9936           else
9937             return COSTS_N_INSNS (1) + const_size;
9938         }
9939       return COSTS_N_INSNS (1);
9940
9941     case SET:
9942       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9943          the mode.  */
9944       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9945       cost = COSTS_N_INSNS (words);
9946       if (satisfies_constraint_J (SET_SRC (x))
9947           || satisfies_constraint_K (SET_SRC (x))
9948              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9949           || (CONST_INT_P (SET_SRC (x))
9950               && UINTVAL (SET_SRC (x)) >= 256
9951               && TARGET_HAVE_MOVT
9952               && satisfies_constraint_j (SET_SRC (x)))
9953              /* thumb1_movdi_insn.  */
9954           || ((words > 1) && MEM_P (SET_SRC (x))))
9955         cost += COSTS_N_INSNS (1);
9956       return cost;
9957
9958     case CONST_INT:
9959       if (outer == SET)
9960         {
9961           if (UINTVAL (x) < 256)
9962             return COSTS_N_INSNS (1);
9963           /* movw is 4byte long.  */
9964           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9965             return COSTS_N_INSNS (2);
9966           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9967           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9968             return COSTS_N_INSNS (2);
9969           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9970           if (thumb_shiftable_const (INTVAL (x)))
9971             return COSTS_N_INSNS (2);
9972           return arm_disable_literal_pool
9973             ? COSTS_N_INSNS (8)
9974             : COSTS_N_INSNS (3);
9975         }
9976       else if ((outer == PLUS || outer == COMPARE)
9977                && INTVAL (x) < 256 && INTVAL (x) > -256)
9978         return 0;
9979       else if ((outer == IOR || outer == XOR || outer == AND)
9980                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9981         return COSTS_N_INSNS (1);
9982       else if (outer == AND)
9983         {
9984           int i;
9985           /* This duplicates the tests in the andsi3 expander.  */
9986           for (i = 9; i <= 31; i++)
9987             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9988                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9989               return COSTS_N_INSNS (2);
9990         }
9991       else if (outer == ASHIFT || outer == ASHIFTRT
9992                || outer == LSHIFTRT)
9993         return 0;
9994       return COSTS_N_INSNS (2);
9995
9996     case CONST:
9997     case CONST_DOUBLE:
9998     case LABEL_REF:
9999     case SYMBOL_REF:
10000       return COSTS_N_INSNS (3);
10001
10002     case UDIV:
10003     case UMOD:
10004     case DIV:
10005     case MOD:
10006       return 100;
10007
10008     case TRUNCATE:
10009       return 99;
10010
10011     case AND:
10012     case XOR:
10013     case IOR:
10014       return COSTS_N_INSNS (1);
10015
10016     case MEM:
10017       return (COSTS_N_INSNS (1)
10018               + COSTS_N_INSNS (1)
10019                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10020               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10021                  ? COSTS_N_INSNS (1) : 0));
10022
10023     case IF_THEN_ELSE:
10024       /* XXX a guess.  */
10025       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10026         return 14;
10027       return 2;
10028
10029     case ZERO_EXTEND:
10030       /* XXX still guessing.  */
10031       switch (GET_MODE (XEXP (x, 0)))
10032         {
10033           case E_QImode:
10034             return (1 + (mode == DImode ? 4 : 0)
10035                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10036
10037           case E_HImode:
10038             return (4 + (mode == DImode ? 4 : 0)
10039                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10040
10041           case E_SImode:
10042             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10043
10044           default:
10045             return 99;
10046         }
10047
10048     default:
10049       return 99;
10050     }
10051 }
10052
10053 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10054    PLUS, adds the carry flag, then return the other operand.  If
10055    neither is a carry, return OP unchanged.  */
10056 static rtx
10057 strip_carry_operation (rtx op)
10058 {
10059   gcc_assert (GET_CODE (op) == PLUS);
10060   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10061     return XEXP (op, 1);
10062   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10063     return XEXP (op, 0);
10064   return op;
10065 }
10066
10067 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10068    operand, then return the operand that is being shifted.  If the shift
10069    is not by a constant, then set SHIFT_REG to point to the operand.
10070    Return NULL if OP is not a shifter operand.  */
10071 static rtx
10072 shifter_op_p (rtx op, rtx *shift_reg)
10073 {
10074   enum rtx_code code = GET_CODE (op);
10075
10076   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10077       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10078     return XEXP (op, 0);
10079   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10080     return XEXP (op, 0);
10081   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10082            || code == ASHIFTRT)
10083     {
10084       if (!CONST_INT_P (XEXP (op, 1)))
10085         *shift_reg = XEXP (op, 1);
10086       return XEXP (op, 0);
10087     }
10088
10089   return NULL;
10090 }
10091
10092 static bool
10093 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10094 {
10095   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10096   rtx_code code = GET_CODE (x);
10097   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10098
10099   switch (XINT (x, 1))
10100     {
10101     case UNSPEC_UNALIGNED_LOAD:
10102       /* We can only do unaligned loads into the integer unit, and we can't
10103          use LDM or LDRD.  */
10104       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10105       if (speed_p)
10106         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10107                   + extra_cost->ldst.load_unaligned);
10108
10109 #ifdef NOT_YET
10110       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10111                                  ADDR_SPACE_GENERIC, speed_p);
10112 #endif
10113       return true;
10114
10115     case UNSPEC_UNALIGNED_STORE:
10116       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10117       if (speed_p)
10118         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10119                   + extra_cost->ldst.store_unaligned);
10120
10121       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10122 #ifdef NOT_YET
10123       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10124                                  ADDR_SPACE_GENERIC, speed_p);
10125 #endif
10126       return true;
10127
10128     case UNSPEC_VRINTZ:
10129     case UNSPEC_VRINTP:
10130     case UNSPEC_VRINTM:
10131     case UNSPEC_VRINTR:
10132     case UNSPEC_VRINTX:
10133     case UNSPEC_VRINTA:
10134       if (speed_p)
10135         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10136
10137       return true;
10138     default:
10139       *cost = COSTS_N_INSNS (2);
10140       break;
10141     }
10142   return true;
10143 }
10144
10145 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10146    call (one insn for -Os) and then one for processing the result.  */
10147 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10148
10149 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10150         do                                                              \
10151           {                                                             \
10152             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10153             if (shift_op != NULL                                        \
10154                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10155               {                                                         \
10156                 if (shift_reg)                                          \
10157                   {                                                     \
10158                     if (speed_p)                                        \
10159                       *cost += extra_cost->alu.arith_shift_reg;         \
10160                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10161                                        ASHIFT, 1, speed_p);             \
10162                   }                                                     \
10163                 else if (speed_p)                                       \
10164                   *cost += extra_cost->alu.arith_shift;                 \
10165                                                                         \
10166                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10167                                     ASHIFT, 0, speed_p)                 \
10168                           + rtx_cost (XEXP (x, 1 - IDX),                \
10169                                       GET_MODE (shift_op),              \
10170                                       OP, 1, speed_p));                 \
10171                 return true;                                            \
10172               }                                                         \
10173           }                                                             \
10174         while (0)
10175
10176 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10177    considering the costs of the addressing mode and memory access
10178    separately.  */
10179 static bool
10180 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10181                int *cost, bool speed_p)
10182 {
10183   machine_mode mode = GET_MODE (x);
10184
10185   *cost = COSTS_N_INSNS (1);
10186
10187   if (flag_pic
10188       && GET_CODE (XEXP (x, 0)) == PLUS
10189       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10190     /* This will be split into two instructions.  Add the cost of the
10191        additional instruction here.  The cost of the memory access is computed
10192        below.  See arm.md:calculate_pic_address.  */
10193     *cost += COSTS_N_INSNS (1);
10194
10195   /* Calculate cost of the addressing mode.  */
10196   if (speed_p)
10197     {
10198       arm_addr_mode_op op_type;
10199       switch (GET_CODE (XEXP (x, 0)))
10200         {
10201         default:
10202         case REG:
10203           op_type = AMO_DEFAULT;
10204           break;
10205         case MINUS:
10206           /* MINUS does not appear in RTL, but the architecture supports it,
10207              so handle this case defensively.  */
10208           /* fall through */
10209         case PLUS:
10210           op_type = AMO_NO_WB;
10211           break;
10212         case PRE_INC:
10213         case PRE_DEC:
10214         case POST_INC:
10215         case POST_DEC:
10216         case PRE_MODIFY:
10217         case POST_MODIFY:
10218           op_type = AMO_WB;
10219           break;
10220         }
10221
10222       if (VECTOR_MODE_P (mode))
10223           *cost += current_tune->addr_mode_costs->vector[op_type];
10224       else if (FLOAT_MODE_P (mode))
10225           *cost += current_tune->addr_mode_costs->fp[op_type];
10226       else
10227           *cost += current_tune->addr_mode_costs->integer[op_type];
10228     }
10229
10230   /* Calculate cost of memory access.  */
10231   if (speed_p)
10232     {
10233       if (FLOAT_MODE_P (mode))
10234         {
10235           if (GET_MODE_SIZE (mode) == 8)
10236             *cost += extra_cost->ldst.loadd;
10237           else
10238             *cost += extra_cost->ldst.loadf;
10239         }
10240       else if (VECTOR_MODE_P (mode))
10241         *cost += extra_cost->ldst.loadv;
10242       else
10243         {
10244           /* Integer modes */
10245           if (GET_MODE_SIZE (mode) == 8)
10246             *cost += extra_cost->ldst.ldrd;
10247           else
10248             *cost += extra_cost->ldst.load;
10249         }
10250     }
10251
10252   return true;
10253 }
10254
10255 /* Helper for arm_bfi_p.  */
10256 static bool
10257 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10258 {
10259   unsigned HOST_WIDE_INT const1;
10260   unsigned HOST_WIDE_INT const2 = 0;
10261
10262   if (!CONST_INT_P (XEXP (op0, 1)))
10263     return false;
10264
10265   const1 = UINTVAL (XEXP (op0, 1));
10266   if (!CONST_INT_P (XEXP (op1, 1))
10267       || ~UINTVAL (XEXP (op1, 1)) != const1)
10268     return false;
10269
10270   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10271       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10272     {
10273       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10274       *sub0 = XEXP (XEXP (op0, 0), 0);
10275     }
10276   else
10277     *sub0 = XEXP (op0, 0);
10278
10279   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10280     return false;
10281
10282   *sub1 = XEXP (op1, 0);
10283   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10284 }
10285
10286 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10287    format looks something like:
10288
10289    (IOR (AND (reg1) (~const1))
10290         (AND (ASHIFT (reg2) (const2))
10291              (const1)))
10292
10293    where const1 is a consecutive sequence of 1-bits with the
10294    least-significant non-zero bit starting at bit position const2.  If
10295    const2 is zero, then the shift will not appear at all, due to
10296    canonicalization.  The two arms of the IOR expression may be
10297    flipped.  */
10298 static bool
10299 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10300 {
10301   if (GET_CODE (x) != IOR)
10302     return false;
10303   if (GET_CODE (XEXP (x, 0)) != AND
10304       || GET_CODE (XEXP (x, 1)) != AND)
10305     return false;
10306   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10307           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10308 }
10309
10310 /* RTX costs.  Make an estimate of the cost of executing the operation
10311    X, which is contained within an operation with code OUTER_CODE.
10312    SPEED_P indicates whether the cost desired is the performance cost,
10313    or the size cost.  The estimate is stored in COST and the return
10314    value is TRUE if the cost calculation is final, or FALSE if the
10315    caller should recurse through the operands of X to add additional
10316    costs.
10317
10318    We currently make no attempt to model the size savings of Thumb-2
10319    16-bit instructions.  At the normal points in compilation where
10320    this code is called we have no measure of whether the condition
10321    flags are live or not, and thus no realistic way to determine what
10322    the size will eventually be.  */
10323 static bool
10324 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10325                    const struct cpu_cost_table *extra_cost,
10326                    int *cost, bool speed_p)
10327 {
10328   machine_mode mode = GET_MODE (x);
10329
10330   *cost = COSTS_N_INSNS (1);
10331
10332   if (TARGET_THUMB1)
10333     {
10334       if (speed_p)
10335         *cost = thumb1_rtx_costs (x, code, outer_code);
10336       else
10337         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10338       return true;
10339     }
10340
10341   switch (code)
10342     {
10343     case SET:
10344       *cost = 0;
10345       /* SET RTXs don't have a mode so we get it from the destination.  */
10346       mode = GET_MODE (SET_DEST (x));
10347
10348       if (REG_P (SET_SRC (x))
10349           && REG_P (SET_DEST (x)))
10350         {
10351           /* Assume that most copies can be done with a single insn,
10352              unless we don't have HW FP, in which case everything
10353              larger than word mode will require two insns.  */
10354           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10355                                    && GET_MODE_SIZE (mode) > 4)
10356                                   || mode == DImode)
10357                                  ? 2 : 1);
10358           /* Conditional register moves can be encoded
10359              in 16 bits in Thumb mode.  */
10360           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10361             *cost >>= 1;
10362
10363           return true;
10364         }
10365
10366       if (CONST_INT_P (SET_SRC (x)))
10367         {
10368           /* Handle CONST_INT here, since the value doesn't have a mode
10369              and we would otherwise be unable to work out the true cost.  */
10370           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10371                             0, speed_p);
10372           outer_code = SET;
10373           /* Slightly lower the cost of setting a core reg to a constant.
10374              This helps break up chains and allows for better scheduling.  */
10375           if (REG_P (SET_DEST (x))
10376               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10377             *cost -= 1;
10378           x = SET_SRC (x);
10379           /* Immediate moves with an immediate in the range [0, 255] can be
10380              encoded in 16 bits in Thumb mode.  */
10381           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10382               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10383             *cost >>= 1;
10384           goto const_int_cost;
10385         }
10386
10387       return false;
10388
10389     case MEM:
10390       return arm_mem_costs (x, extra_cost, cost, speed_p);
10391
10392     case PARALLEL:
10393     {
10394    /* Calculations of LDM costs are complex.  We assume an initial cost
10395    (ldm_1st) which will load the number of registers mentioned in
10396    ldm_regs_per_insn_1st registers; then each additional
10397    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10398    formula for N regs is thus:
10399
10400    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10401                              + ldm_regs_per_insn_subsequent - 1)
10402                             / ldm_regs_per_insn_subsequent).
10403
10404    Additional costs may also be added for addressing.  A similar
10405    formula is used for STM.  */
10406
10407       bool is_ldm = load_multiple_operation (x, SImode);
10408       bool is_stm = store_multiple_operation (x, SImode);
10409
10410       if (is_ldm || is_stm)
10411         {
10412           if (speed_p)
10413             {
10414               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10415               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10416                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10417                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10418               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10419                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10420                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10421
10422               *cost += regs_per_insn_1st
10423                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10424                                             + regs_per_insn_sub - 1)
10425                                           / regs_per_insn_sub);
10426               return true;
10427             }
10428
10429         }
10430       return false;
10431     }
10432     case DIV:
10433     case UDIV:
10434       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10435           && (mode == SFmode || !TARGET_VFP_SINGLE))
10436         *cost += COSTS_N_INSNS (speed_p
10437                                ? extra_cost->fp[mode != SFmode].div : 0);
10438       else if (mode == SImode && TARGET_IDIV)
10439         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10440       else
10441         *cost = LIBCALL_COST (2);
10442
10443       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10444          possible udiv is prefered.  */
10445       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10446       return false;     /* All arguments must be in registers.  */
10447
10448     case MOD:
10449       /* MOD by a power of 2 can be expanded as:
10450          rsbs    r1, r0, #0
10451          and     r0, r0, #(n - 1)
10452          and     r1, r1, #(n - 1)
10453          rsbpl   r0, r1, #0.  */
10454       if (CONST_INT_P (XEXP (x, 1))
10455           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10456           && mode == SImode)
10457         {
10458           *cost += COSTS_N_INSNS (3);
10459
10460           if (speed_p)
10461             *cost += 2 * extra_cost->alu.logical
10462                      + extra_cost->alu.arith;
10463           return true;
10464         }
10465
10466     /* Fall-through.  */
10467     case UMOD:
10468       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10469          possible udiv is prefered.  */
10470       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10471       return false;     /* All arguments must be in registers.  */
10472
10473     case ROTATE:
10474       if (mode == SImode && REG_P (XEXP (x, 1)))
10475         {
10476           *cost += (COSTS_N_INSNS (1)
10477                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10478           if (speed_p)
10479             *cost += extra_cost->alu.shift_reg;
10480           return true;
10481         }
10482       /* Fall through */
10483     case ROTATERT:
10484     case ASHIFT:
10485     case LSHIFTRT:
10486     case ASHIFTRT:
10487       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10488         {
10489           *cost += (COSTS_N_INSNS (2)
10490                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10491           if (speed_p)
10492             *cost += 2 * extra_cost->alu.shift;
10493           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10494           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10495             *cost += 1;
10496           return true;
10497         }
10498       else if (mode == SImode)
10499         {
10500           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10501           /* Slightly disparage register shifts at -Os, but not by much.  */
10502           if (!CONST_INT_P (XEXP (x, 1)))
10503             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10504                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10505           return true;
10506         }
10507       else if (GET_MODE_CLASS (mode) == MODE_INT
10508                && GET_MODE_SIZE (mode) < 4)
10509         {
10510           if (code == ASHIFT)
10511             {
10512               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10513               /* Slightly disparage register shifts at -Os, but not by
10514                  much.  */
10515               if (!CONST_INT_P (XEXP (x, 1)))
10516                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10517                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10518             }
10519           else if (code == LSHIFTRT || code == ASHIFTRT)
10520             {
10521               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10522                 {
10523                   /* Can use SBFX/UBFX.  */
10524                   if (speed_p)
10525                     *cost += extra_cost->alu.bfx;
10526                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10527                 }
10528               else
10529                 {
10530                   *cost += COSTS_N_INSNS (1);
10531                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10532                   if (speed_p)
10533                     {
10534                       if (CONST_INT_P (XEXP (x, 1)))
10535                         *cost += 2 * extra_cost->alu.shift;
10536                       else
10537                         *cost += (extra_cost->alu.shift
10538                                   + extra_cost->alu.shift_reg);
10539                     }
10540                   else
10541                     /* Slightly disparage register shifts.  */
10542                     *cost += !CONST_INT_P (XEXP (x, 1));
10543                 }
10544             }
10545           else /* Rotates.  */
10546             {
10547               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10548               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10549               if (speed_p)
10550                 {
10551                   if (CONST_INT_P (XEXP (x, 1)))
10552                     *cost += (2 * extra_cost->alu.shift
10553                               + extra_cost->alu.log_shift);
10554                   else
10555                     *cost += (extra_cost->alu.shift
10556                               + extra_cost->alu.shift_reg
10557                               + extra_cost->alu.log_shift_reg);
10558                 }
10559             }
10560           return true;
10561         }
10562
10563       *cost = LIBCALL_COST (2);
10564       return false;
10565
10566     case BSWAP:
10567       if (arm_arch6)
10568         {
10569           if (mode == SImode)
10570             {
10571               if (speed_p)
10572                 *cost += extra_cost->alu.rev;
10573
10574               return false;
10575             }
10576         }
10577       else
10578         {
10579         /* No rev instruction available.  Look at arm_legacy_rev
10580            and thumb_legacy_rev for the form of RTL used then.  */
10581           if (TARGET_THUMB)
10582             {
10583               *cost += COSTS_N_INSNS (9);
10584
10585               if (speed_p)
10586                 {
10587                   *cost += 6 * extra_cost->alu.shift;
10588                   *cost += 3 * extra_cost->alu.logical;
10589                 }
10590             }
10591           else
10592             {
10593               *cost += COSTS_N_INSNS (4);
10594
10595               if (speed_p)
10596                 {
10597                   *cost += 2 * extra_cost->alu.shift;
10598                   *cost += extra_cost->alu.arith_shift;
10599                   *cost += 2 * extra_cost->alu.logical;
10600                 }
10601             }
10602           return true;
10603         }
10604       return false;
10605
10606     case MINUS:
10607       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10608           && (mode == SFmode || !TARGET_VFP_SINGLE))
10609         {
10610           if (GET_CODE (XEXP (x, 0)) == MULT
10611               || GET_CODE (XEXP (x, 1)) == MULT)
10612             {
10613               rtx mul_op0, mul_op1, sub_op;
10614
10615               if (speed_p)
10616                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10617
10618               if (GET_CODE (XEXP (x, 0)) == MULT)
10619                 {
10620                   mul_op0 = XEXP (XEXP (x, 0), 0);
10621                   mul_op1 = XEXP (XEXP (x, 0), 1);
10622                   sub_op = XEXP (x, 1);
10623                 }
10624               else
10625                 {
10626                   mul_op0 = XEXP (XEXP (x, 1), 0);
10627                   mul_op1 = XEXP (XEXP (x, 1), 1);
10628                   sub_op = XEXP (x, 0);
10629                 }
10630
10631               /* The first operand of the multiply may be optionally
10632                  negated.  */
10633               if (GET_CODE (mul_op0) == NEG)
10634                 mul_op0 = XEXP (mul_op0, 0);
10635
10636               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10637                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10638                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10639
10640               return true;
10641             }
10642
10643           if (speed_p)
10644             *cost += extra_cost->fp[mode != SFmode].addsub;
10645           return false;
10646         }
10647
10648       if (mode == SImode)
10649         {
10650           rtx shift_by_reg = NULL;
10651           rtx shift_op;
10652           rtx non_shift_op;
10653           rtx op0 = XEXP (x, 0);
10654           rtx op1 = XEXP (x, 1);
10655
10656           /* Factor out any borrow operation.  There's more than one way
10657              of expressing this; try to recognize them all.  */
10658           if (GET_CODE (op0) == MINUS)
10659             {
10660               if (arm_borrow_operation (op1, SImode))
10661                 {
10662                   op1 = XEXP (op0, 1);
10663                   op0 = XEXP (op0, 0);
10664                 }
10665               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10666                 op0 = XEXP (op0, 0);
10667             }
10668           else if (GET_CODE (op1) == PLUS
10669                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10670             op1 = XEXP (op1, 0);
10671           else if (GET_CODE (op0) == NEG
10672                    && arm_borrow_operation (op1, SImode))
10673             {
10674               /* Negate with carry-in.  For Thumb2 this is done with
10675                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10676                  RSC instruction that exists in Arm mode.  */
10677               if (speed_p)
10678                 *cost += (TARGET_THUMB2
10679                           ? extra_cost->alu.arith_shift
10680                           : extra_cost->alu.arith);
10681               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10682               return true;
10683             }
10684           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10685              Note we do mean ~borrow here.  */
10686           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10687             {
10688               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10689               return true;
10690             }
10691
10692           shift_op = shifter_op_p (op0, &shift_by_reg);
10693           if (shift_op == NULL)
10694             {
10695               shift_op = shifter_op_p (op1, &shift_by_reg);
10696               non_shift_op = op0;
10697             }
10698           else
10699             non_shift_op = op1;
10700
10701           if (shift_op != NULL)
10702             {
10703               if (shift_by_reg != NULL)
10704                 {
10705                   if (speed_p)
10706                     *cost += extra_cost->alu.arith_shift_reg;
10707                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10708                 }
10709               else if (speed_p)
10710                 *cost += extra_cost->alu.arith_shift;
10711
10712               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10713               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10714               return true;
10715             }
10716
10717           if (arm_arch_thumb2
10718               && GET_CODE (XEXP (x, 1)) == MULT)
10719             {
10720               /* MLS.  */
10721               if (speed_p)
10722                 *cost += extra_cost->mult[0].add;
10723               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10724               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10725               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10726               return true;
10727             }
10728
10729           if (CONST_INT_P (op0))
10730             {
10731               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10732                                             INTVAL (op0), NULL_RTX,
10733                                             NULL_RTX, 1, 0);
10734               *cost = COSTS_N_INSNS (insns);
10735               if (speed_p)
10736                 *cost += insns * extra_cost->alu.arith;
10737               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10738               return true;
10739             }
10740           else if (speed_p)
10741             *cost += extra_cost->alu.arith;
10742
10743           /* Don't recurse as we don't want to cost any borrow that
10744              we've stripped.  */
10745           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10746           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10747           return true;
10748         }
10749
10750       if (GET_MODE_CLASS (mode) == MODE_INT
10751           && GET_MODE_SIZE (mode) < 4)
10752         {
10753           rtx shift_op, shift_reg;
10754           shift_reg = NULL;
10755
10756           /* We check both sides of the MINUS for shifter operands since,
10757              unlike PLUS, it's not commutative.  */
10758
10759           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10760           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10761
10762           /* Slightly disparage, as we might need to widen the result.  */
10763           *cost += 1;
10764           if (speed_p)
10765             *cost += extra_cost->alu.arith;
10766
10767           if (CONST_INT_P (XEXP (x, 0)))
10768             {
10769               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10770               return true;
10771             }
10772
10773           return false;
10774         }
10775
10776       if (mode == DImode)
10777         {
10778           *cost += COSTS_N_INSNS (1);
10779
10780           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10781             {
10782               rtx op1 = XEXP (x, 1);
10783
10784               if (speed_p)
10785                 *cost += 2 * extra_cost->alu.arith;
10786
10787               if (GET_CODE (op1) == ZERO_EXTEND)
10788                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10789                                    0, speed_p);
10790               else
10791                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10792               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10793                                  0, speed_p);
10794               return true;
10795             }
10796           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10797             {
10798               if (speed_p)
10799                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10800               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10801                                   0, speed_p)
10802                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10803               return true;
10804             }
10805           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10806                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10807             {
10808               if (speed_p)
10809                 *cost += (extra_cost->alu.arith
10810                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10811                              ? extra_cost->alu.arith
10812                              : extra_cost->alu.arith_shift));
10813               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10814                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10815                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10816               return true;
10817             }
10818
10819           if (speed_p)
10820             *cost += 2 * extra_cost->alu.arith;
10821           return false;
10822         }
10823
10824       /* Vector mode?  */
10825
10826       *cost = LIBCALL_COST (2);
10827       return false;
10828
10829     case PLUS:
10830       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10831           && (mode == SFmode || !TARGET_VFP_SINGLE))
10832         {
10833           if (GET_CODE (XEXP (x, 0)) == MULT)
10834             {
10835               rtx mul_op0, mul_op1, add_op;
10836
10837               if (speed_p)
10838                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10839
10840               mul_op0 = XEXP (XEXP (x, 0), 0);
10841               mul_op1 = XEXP (XEXP (x, 0), 1);
10842               add_op = XEXP (x, 1);
10843
10844               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10845                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10846                         + rtx_cost (add_op, mode, code, 0, speed_p));
10847
10848               return true;
10849             }
10850
10851           if (speed_p)
10852             *cost += extra_cost->fp[mode != SFmode].addsub;
10853           return false;
10854         }
10855       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10856         {
10857           *cost = LIBCALL_COST (2);
10858           return false;
10859         }
10860
10861         /* Narrow modes can be synthesized in SImode, but the range
10862            of useful sub-operations is limited.  Check for shift operations
10863            on one of the operands.  Only left shifts can be used in the
10864            narrow modes.  */
10865       if (GET_MODE_CLASS (mode) == MODE_INT
10866           && GET_MODE_SIZE (mode) < 4)
10867         {
10868           rtx shift_op, shift_reg;
10869           shift_reg = NULL;
10870
10871           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10872
10873           if (CONST_INT_P (XEXP (x, 1)))
10874             {
10875               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10876                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10877                                             NULL_RTX, 1, 0);
10878               *cost = COSTS_N_INSNS (insns);
10879               if (speed_p)
10880                 *cost += insns * extra_cost->alu.arith;
10881               /* Slightly penalize a narrow operation as the result may
10882                  need widening.  */
10883               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10884               return true;
10885             }
10886
10887           /* Slightly penalize a narrow operation as the result may
10888              need widening.  */
10889           *cost += 1;
10890           if (speed_p)
10891             *cost += extra_cost->alu.arith;
10892
10893           return false;
10894         }
10895
10896       if (mode == SImode)
10897         {
10898           rtx shift_op, shift_reg;
10899
10900           if (TARGET_INT_SIMD
10901               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10902                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10903             {
10904               /* UXTA[BH] or SXTA[BH].  */
10905               if (speed_p)
10906                 *cost += extra_cost->alu.extend_arith;
10907               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10908                                   0, speed_p)
10909                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10910               return true;
10911             }
10912
10913           rtx op0 = XEXP (x, 0);
10914           rtx op1 = XEXP (x, 1);
10915
10916           /* Handle a side effect of adding in the carry to an addition.  */
10917           if (GET_CODE (op0) == PLUS
10918               && arm_carry_operation (op1, mode))
10919             {
10920               op1 = XEXP (op0, 1);
10921               op0 = XEXP (op0, 0);
10922             }
10923           else if (GET_CODE (op1) == PLUS
10924                    && arm_carry_operation (op0, mode))
10925             {
10926               op0 = XEXP (op1, 0);
10927               op1 = XEXP (op1, 1);
10928             }
10929           else if (GET_CODE (op0) == PLUS)
10930             {
10931               op0 = strip_carry_operation (op0);
10932               if (swap_commutative_operands_p (op0, op1))
10933                 std::swap (op0, op1);
10934             }
10935
10936           if (arm_carry_operation (op0, mode))
10937             {
10938               /* Adding the carry to a register is a canonicalization of
10939                  adding 0 to the register plus the carry.  */
10940               if (speed_p)
10941                 *cost += extra_cost->alu.arith;
10942               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10943               return true;
10944             }
10945
10946           shift_reg = NULL;
10947           shift_op = shifter_op_p (op0, &shift_reg);
10948           if (shift_op != NULL)
10949             {
10950               if (shift_reg)
10951                 {
10952                   if (speed_p)
10953                     *cost += extra_cost->alu.arith_shift_reg;
10954                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10955                 }
10956               else if (speed_p)
10957                 *cost += extra_cost->alu.arith_shift;
10958
10959               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10960                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10961               return true;
10962             }
10963
10964           if (GET_CODE (op0) == MULT)
10965             {
10966               rtx mul_op = op0;
10967
10968               if (TARGET_DSP_MULTIPLY
10969                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10970                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10971                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10972                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10973                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10974                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10975                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10976                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10977                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10978                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10979                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10980                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10981                                       == 16))))))
10982                 {
10983                   /* SMLA[BT][BT].  */
10984                   if (speed_p)
10985                     *cost += extra_cost->mult[0].extend_add;
10986                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10987                                       SIGN_EXTEND, 0, speed_p)
10988                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10989                                         SIGN_EXTEND, 0, speed_p)
10990                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10991                   return true;
10992                 }
10993
10994               if (speed_p)
10995                 *cost += extra_cost->mult[0].add;
10996               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10997                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10998                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10999               return true;
11000             }
11001
11002           if (CONST_INT_P (op1))
11003             {
11004               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11005                                             INTVAL (op1), NULL_RTX,
11006                                             NULL_RTX, 1, 0);
11007               *cost = COSTS_N_INSNS (insns);
11008               if (speed_p)
11009                 *cost += insns * extra_cost->alu.arith;
11010               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11011               return true;
11012             }
11013
11014           if (speed_p)
11015             *cost += extra_cost->alu.arith;
11016
11017           /* Don't recurse here because we want to test the operands
11018              without any carry operation.  */
11019           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11020           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11021           return true;
11022         }
11023
11024       if (mode == DImode)
11025         {
11026           if (GET_CODE (XEXP (x, 0)) == MULT
11027               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11028                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11029                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11030                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11031             {
11032               if (speed_p)
11033                 *cost += extra_cost->mult[1].extend_add;
11034               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11035                                   ZERO_EXTEND, 0, speed_p)
11036                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11037                                     ZERO_EXTEND, 0, speed_p)
11038                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11039               return true;
11040             }
11041
11042           *cost += COSTS_N_INSNS (1);
11043
11044           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11045               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11046             {
11047               if (speed_p)
11048                 *cost += (extra_cost->alu.arith
11049                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11050                              ? extra_cost->alu.arith
11051                              : extra_cost->alu.arith_shift));
11052
11053               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11054                                   0, speed_p)
11055                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11056               return true;
11057             }
11058
11059           if (speed_p)
11060             *cost += 2 * extra_cost->alu.arith;
11061           return false;
11062         }
11063
11064       /* Vector mode?  */
11065       *cost = LIBCALL_COST (2);
11066       return false;
11067     case IOR:
11068       {
11069         rtx sub0, sub1;
11070         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11071           {
11072             if (speed_p)
11073               *cost += extra_cost->alu.rev;
11074
11075             return true;
11076           }
11077         else if (mode == SImode && arm_arch_thumb2
11078                  && arm_bfi_p (x, &sub0, &sub1))
11079           {
11080             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11081             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11082             if (speed_p)
11083               *cost += extra_cost->alu.bfi;
11084
11085             return true;
11086           }
11087       }
11088
11089       /* Fall through.  */
11090     case AND: case XOR:
11091       if (mode == SImode)
11092         {
11093           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11094           rtx op0 = XEXP (x, 0);
11095           rtx shift_op, shift_reg;
11096
11097           if (subcode == NOT
11098               && (code == AND
11099                   || (code == IOR && TARGET_THUMB2)))
11100             op0 = XEXP (op0, 0);
11101
11102           shift_reg = NULL;
11103           shift_op = shifter_op_p (op0, &shift_reg);
11104           if (shift_op != NULL)
11105             {
11106               if (shift_reg)
11107                 {
11108                   if (speed_p)
11109                     *cost += extra_cost->alu.log_shift_reg;
11110                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11111                 }
11112               else if (speed_p)
11113                 *cost += extra_cost->alu.log_shift;
11114
11115               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11116                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11117               return true;
11118             }
11119
11120           if (CONST_INT_P (XEXP (x, 1)))
11121             {
11122               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11123                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11124                                             NULL_RTX, 1, 0);
11125
11126               *cost = COSTS_N_INSNS (insns);
11127               if (speed_p)
11128                 *cost += insns * extra_cost->alu.logical;
11129               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11130               return true;
11131             }
11132
11133           if (speed_p)
11134             *cost += extra_cost->alu.logical;
11135           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11136                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11137           return true;
11138         }
11139
11140       if (mode == DImode)
11141         {
11142           rtx op0 = XEXP (x, 0);
11143           enum rtx_code subcode = GET_CODE (op0);
11144
11145           *cost += COSTS_N_INSNS (1);
11146
11147           if (subcode == NOT
11148               && (code == AND
11149                   || (code == IOR && TARGET_THUMB2)))
11150             op0 = XEXP (op0, 0);
11151
11152           if (GET_CODE (op0) == ZERO_EXTEND)
11153             {
11154               if (speed_p)
11155                 *cost += 2 * extra_cost->alu.logical;
11156
11157               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11158                                   0, speed_p)
11159                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11160               return true;
11161             }
11162           else if (GET_CODE (op0) == SIGN_EXTEND)
11163             {
11164               if (speed_p)
11165                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11166
11167               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11168                                   0, speed_p)
11169                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11170               return true;
11171             }
11172
11173           if (speed_p)
11174             *cost += 2 * extra_cost->alu.logical;
11175
11176           return true;
11177         }
11178       /* Vector mode?  */
11179
11180       *cost = LIBCALL_COST (2);
11181       return false;
11182
11183     case MULT:
11184       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11185           && (mode == SFmode || !TARGET_VFP_SINGLE))
11186         {
11187           rtx op0 = XEXP (x, 0);
11188
11189           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11190             op0 = XEXP (op0, 0);
11191
11192           if (speed_p)
11193             *cost += extra_cost->fp[mode != SFmode].mult;
11194
11195           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11196                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11197           return true;
11198         }
11199       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11200         {
11201           *cost = LIBCALL_COST (2);
11202           return false;
11203         }
11204
11205       if (mode == SImode)
11206         {
11207           if (TARGET_DSP_MULTIPLY
11208               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11209                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11210                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11211                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11212                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11213                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11214                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11215                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11216                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11217                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11218                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11219                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11220                                   == 16))))))
11221             {
11222               /* SMUL[TB][TB].  */
11223               if (speed_p)
11224                 *cost += extra_cost->mult[0].extend;
11225               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11226                                  SIGN_EXTEND, 0, speed_p);
11227               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11228                                  SIGN_EXTEND, 1, speed_p);
11229               return true;
11230             }
11231           if (speed_p)
11232             *cost += extra_cost->mult[0].simple;
11233           return false;
11234         }
11235
11236       if (mode == DImode)
11237         {
11238           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11239                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11240                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11241                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11242             {
11243               if (speed_p)
11244                 *cost += extra_cost->mult[1].extend;
11245               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11246                                   ZERO_EXTEND, 0, speed_p)
11247                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11248                                     ZERO_EXTEND, 0, speed_p));
11249               return true;
11250             }
11251
11252           *cost = LIBCALL_COST (2);
11253           return false;
11254         }
11255
11256       /* Vector mode?  */
11257       *cost = LIBCALL_COST (2);
11258       return false;
11259
11260     case NEG:
11261       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11262           && (mode == SFmode || !TARGET_VFP_SINGLE))
11263         {
11264           if (GET_CODE (XEXP (x, 0)) == MULT)
11265             {
11266               /* VNMUL.  */
11267               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11268               return true;
11269             }
11270
11271           if (speed_p)
11272             *cost += extra_cost->fp[mode != SFmode].neg;
11273
11274           return false;
11275         }
11276       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11277         {
11278           *cost = LIBCALL_COST (1);
11279           return false;
11280         }
11281
11282       if (mode == SImode)
11283         {
11284           if (GET_CODE (XEXP (x, 0)) == ABS)
11285             {
11286               *cost += COSTS_N_INSNS (1);
11287               /* Assume the non-flag-changing variant.  */
11288               if (speed_p)
11289                 *cost += (extra_cost->alu.log_shift
11290                           + extra_cost->alu.arith_shift);
11291               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11292               return true;
11293             }
11294
11295           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11296               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11297             {
11298               *cost += COSTS_N_INSNS (1);
11299               /* No extra cost for MOV imm and MVN imm.  */
11300               /* If the comparison op is using the flags, there's no further
11301                  cost, otherwise we need to add the cost of the comparison.  */
11302               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11303                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11304                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11305                 {
11306                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11307                   *cost += (COSTS_N_INSNS (1)
11308                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11309                                         0, speed_p)
11310                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11311                                         1, speed_p));
11312                   if (speed_p)
11313                     *cost += extra_cost->alu.arith;
11314                 }
11315               return true;
11316             }
11317
11318           if (speed_p)
11319             *cost += extra_cost->alu.arith;
11320           return false;
11321         }
11322
11323       if (GET_MODE_CLASS (mode) == MODE_INT
11324           && GET_MODE_SIZE (mode) < 4)
11325         {
11326           /* Slightly disparage, as we might need an extend operation.  */
11327           *cost += 1;
11328           if (speed_p)
11329             *cost += extra_cost->alu.arith;
11330           return false;
11331         }
11332
11333       if (mode == DImode)
11334         {
11335           *cost += COSTS_N_INSNS (1);
11336           if (speed_p)
11337             *cost += 2 * extra_cost->alu.arith;
11338           return false;
11339         }
11340
11341       /* Vector mode?  */
11342       *cost = LIBCALL_COST (1);
11343       return false;
11344
11345     case NOT:
11346       if (mode == SImode)
11347         {
11348           rtx shift_op;
11349           rtx shift_reg = NULL;
11350
11351           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11352
11353           if (shift_op)
11354             {
11355               if (shift_reg != NULL)
11356                 {
11357                   if (speed_p)
11358                     *cost += extra_cost->alu.log_shift_reg;
11359                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11360                 }
11361               else if (speed_p)
11362                 *cost += extra_cost->alu.log_shift;
11363               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11364               return true;
11365             }
11366
11367           if (speed_p)
11368             *cost += extra_cost->alu.logical;
11369           return false;
11370         }
11371       if (mode == DImode)
11372         {
11373           *cost += COSTS_N_INSNS (1);
11374           return false;
11375         }
11376
11377       /* Vector mode?  */
11378
11379       *cost += LIBCALL_COST (1);
11380       return false;
11381
11382     case IF_THEN_ELSE:
11383       {
11384         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11385           {
11386             *cost += COSTS_N_INSNS (3);
11387             return true;
11388           }
11389         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11390         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11391
11392         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11393         /* Assume that if one arm of the if_then_else is a register,
11394            that it will be tied with the result and eliminate the
11395            conditional insn.  */
11396         if (REG_P (XEXP (x, 1)))
11397           *cost += op2cost;
11398         else if (REG_P (XEXP (x, 2)))
11399           *cost += op1cost;
11400         else
11401           {
11402             if (speed_p)
11403               {
11404                 if (extra_cost->alu.non_exec_costs_exec)
11405                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11406                 else
11407                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11408               }
11409             else
11410               *cost += op1cost + op2cost;
11411           }
11412       }
11413       return true;
11414
11415     case COMPARE:
11416       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11417         *cost = 0;
11418       else
11419         {
11420           machine_mode op0mode;
11421           /* We'll mostly assume that the cost of a compare is the cost of the
11422              LHS.  However, there are some notable exceptions.  */
11423
11424           /* Floating point compares are never done as side-effects.  */
11425           op0mode = GET_MODE (XEXP (x, 0));
11426           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11427               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11428             {
11429               if (speed_p)
11430                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11431
11432               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11433                 {
11434                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11435                   return true;
11436                 }
11437
11438               return false;
11439             }
11440           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11441             {
11442               *cost = LIBCALL_COST (2);
11443               return false;
11444             }
11445
11446           /* DImode compares normally take two insns.  */
11447           if (op0mode == DImode)
11448             {
11449               *cost += COSTS_N_INSNS (1);
11450               if (speed_p)
11451                 *cost += 2 * extra_cost->alu.arith;
11452               return false;
11453             }
11454
11455           if (op0mode == SImode)
11456             {
11457               rtx shift_op;
11458               rtx shift_reg;
11459
11460               if (XEXP (x, 1) == const0_rtx
11461                   && !(REG_P (XEXP (x, 0))
11462                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11463                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11464                 {
11465                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11466
11467                   /* Multiply operations that set the flags are often
11468                      significantly more expensive.  */
11469                   if (speed_p
11470                       && GET_CODE (XEXP (x, 0)) == MULT
11471                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11472                     *cost += extra_cost->mult[0].flag_setting;
11473
11474                   if (speed_p
11475                       && GET_CODE (XEXP (x, 0)) == PLUS
11476                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11477                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11478                                                             0), 1), mode))
11479                     *cost += extra_cost->mult[0].flag_setting;
11480                   return true;
11481                 }
11482
11483               shift_reg = NULL;
11484               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11485               if (shift_op != NULL)
11486                 {
11487                   if (shift_reg != NULL)
11488                     {
11489                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11490                                          1, speed_p);
11491                       if (speed_p)
11492                         *cost += extra_cost->alu.arith_shift_reg;
11493                     }
11494                   else if (speed_p)
11495                     *cost += extra_cost->alu.arith_shift;
11496                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11497                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11498                   return true;
11499                 }
11500
11501               if (speed_p)
11502                 *cost += extra_cost->alu.arith;
11503               if (CONST_INT_P (XEXP (x, 1))
11504                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11505                 {
11506                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11507                   return true;
11508                 }
11509               return false;
11510             }
11511
11512           /* Vector mode?  */
11513
11514           *cost = LIBCALL_COST (2);
11515           return false;
11516         }
11517       return true;
11518
11519     case EQ:
11520     case GE:
11521     case GT:
11522     case LE:
11523     case LT:
11524       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11525          vcle and vclt). */
11526       if (TARGET_NEON
11527           && TARGET_HARD_FLOAT
11528           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11529           && (XEXP (x, 1) == CONST0_RTX (mode)))
11530         {
11531           *cost = 0;
11532           return true;
11533         }
11534
11535       /* Fall through.  */
11536     case NE:
11537     case LTU:
11538     case LEU:
11539     case GEU:
11540     case GTU:
11541     case ORDERED:
11542     case UNORDERED:
11543     case UNEQ:
11544     case UNLE:
11545     case UNLT:
11546     case UNGE:
11547     case UNGT:
11548     case LTGT:
11549       if (outer_code == SET)
11550         {
11551           /* Is it a store-flag operation?  */
11552           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11553               && XEXP (x, 1) == const0_rtx)
11554             {
11555               /* Thumb also needs an IT insn.  */
11556               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11557               return true;
11558             }
11559           if (XEXP (x, 1) == const0_rtx)
11560             {
11561               switch (code)
11562                 {
11563                 case LT:
11564                   /* LSR Rd, Rn, #31.  */
11565                   if (speed_p)
11566                     *cost += extra_cost->alu.shift;
11567                   break;
11568
11569                 case EQ:
11570                   /* RSBS T1, Rn, #0
11571                      ADC  Rd, Rn, T1.  */
11572
11573                 case NE:
11574                   /* SUBS T1, Rn, #1
11575                      SBC  Rd, Rn, T1.  */
11576                   *cost += COSTS_N_INSNS (1);
11577                   break;
11578
11579                 case LE:
11580                   /* RSBS T1, Rn, Rn, LSR #31
11581                      ADC  Rd, Rn, T1. */
11582                   *cost += COSTS_N_INSNS (1);
11583                   if (speed_p)
11584                     *cost += extra_cost->alu.arith_shift;
11585                   break;
11586
11587                 case GT:
11588                   /* RSB  Rd, Rn, Rn, ASR #1
11589                      LSR  Rd, Rd, #31.  */
11590                   *cost += COSTS_N_INSNS (1);
11591                   if (speed_p)
11592                     *cost += (extra_cost->alu.arith_shift
11593                               + extra_cost->alu.shift);
11594                   break;
11595
11596                 case GE:
11597                   /* ASR  Rd, Rn, #31
11598                      ADD  Rd, Rn, #1.  */
11599                   *cost += COSTS_N_INSNS (1);
11600                   if (speed_p)
11601                     *cost += extra_cost->alu.shift;
11602                   break;
11603
11604                 default:
11605                   /* Remaining cases are either meaningless or would take
11606                      three insns anyway.  */
11607                   *cost = COSTS_N_INSNS (3);
11608                   break;
11609                 }
11610               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11611               return true;
11612             }
11613           else
11614             {
11615               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11616               if (CONST_INT_P (XEXP (x, 1))
11617                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11618                 {
11619                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11620                   return true;
11621                 }
11622
11623               return false;
11624             }
11625         }
11626       /* Not directly inside a set.  If it involves the condition code
11627          register it must be the condition for a branch, cond_exec or
11628          I_T_E operation.  Since the comparison is performed elsewhere
11629          this is just the control part which has no additional
11630          cost.  */
11631       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11632                && XEXP (x, 1) == const0_rtx)
11633         {
11634           *cost = 0;
11635           return true;
11636         }
11637       return false;
11638
11639     case ABS:
11640       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11641           && (mode == SFmode || !TARGET_VFP_SINGLE))
11642         {
11643           if (speed_p)
11644             *cost += extra_cost->fp[mode != SFmode].neg;
11645
11646           return false;
11647         }
11648       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11649         {
11650           *cost = LIBCALL_COST (1);
11651           return false;
11652         }
11653
11654       if (mode == SImode)
11655         {
11656           if (speed_p)
11657             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11658           return false;
11659         }
11660       /* Vector mode?  */
11661       *cost = LIBCALL_COST (1);
11662       return false;
11663
11664     case SIGN_EXTEND:
11665       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11666           && MEM_P (XEXP (x, 0)))
11667         {
11668           if (mode == DImode)
11669             *cost += COSTS_N_INSNS (1);
11670
11671           if (!speed_p)
11672             return true;
11673
11674           if (GET_MODE (XEXP (x, 0)) == SImode)
11675             *cost += extra_cost->ldst.load;
11676           else
11677             *cost += extra_cost->ldst.load_sign_extend;
11678
11679           if (mode == DImode)
11680             *cost += extra_cost->alu.shift;
11681
11682           return true;
11683         }
11684
11685       /* Widening from less than 32-bits requires an extend operation.  */
11686       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11687         {
11688           /* We have SXTB/SXTH.  */
11689           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11690           if (speed_p)
11691             *cost += extra_cost->alu.extend;
11692         }
11693       else if (GET_MODE (XEXP (x, 0)) != SImode)
11694         {
11695           /* Needs two shifts.  */
11696           *cost += COSTS_N_INSNS (1);
11697           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11698           if (speed_p)
11699             *cost += 2 * extra_cost->alu.shift;
11700         }
11701
11702       /* Widening beyond 32-bits requires one more insn.  */
11703       if (mode == DImode)
11704         {
11705           *cost += COSTS_N_INSNS (1);
11706           if (speed_p)
11707             *cost += extra_cost->alu.shift;
11708         }
11709
11710       return true;
11711
11712     case ZERO_EXTEND:
11713       if ((arm_arch4
11714            || GET_MODE (XEXP (x, 0)) == SImode
11715            || GET_MODE (XEXP (x, 0)) == QImode)
11716           && MEM_P (XEXP (x, 0)))
11717         {
11718           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11719
11720           if (mode == DImode)
11721             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11722
11723           return true;
11724         }
11725
11726       /* Widening from less than 32-bits requires an extend operation.  */
11727       if (GET_MODE (XEXP (x, 0)) == QImode)
11728         {
11729           /* UXTB can be a shorter instruction in Thumb2, but it might
11730              be slower than the AND Rd, Rn, #255 alternative.  When
11731              optimizing for speed it should never be slower to use
11732              AND, and we don't really model 16-bit vs 32-bit insns
11733              here.  */
11734           if (speed_p)
11735             *cost += extra_cost->alu.logical;
11736         }
11737       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11738         {
11739           /* We have UXTB/UXTH.  */
11740           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11741           if (speed_p)
11742             *cost += extra_cost->alu.extend;
11743         }
11744       else if (GET_MODE (XEXP (x, 0)) != SImode)
11745         {
11746           /* Needs two shifts.  It's marginally preferable to use
11747              shifts rather than two BIC instructions as the second
11748              shift may merge with a subsequent insn as a shifter
11749              op.  */
11750           *cost = COSTS_N_INSNS (2);
11751           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11752           if (speed_p)
11753             *cost += 2 * extra_cost->alu.shift;
11754         }
11755
11756       /* Widening beyond 32-bits requires one more insn.  */
11757       if (mode == DImode)
11758         {
11759           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11760         }
11761
11762       return true;
11763
11764     case CONST_INT:
11765       *cost = 0;
11766       /* CONST_INT has no mode, so we cannot tell for sure how many
11767          insns are really going to be needed.  The best we can do is
11768          look at the value passed.  If it fits in SImode, then assume
11769          that's the mode it will be used for.  Otherwise assume it
11770          will be used in DImode.  */
11771       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11772         mode = SImode;
11773       else
11774         mode = DImode;
11775
11776       /* Avoid blowing up in arm_gen_constant ().  */
11777       if (!(outer_code == PLUS
11778             || outer_code == AND
11779             || outer_code == IOR
11780             || outer_code == XOR
11781             || outer_code == MINUS))
11782         outer_code = SET;
11783
11784     const_int_cost:
11785       if (mode == SImode)
11786         {
11787           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11788                                                     INTVAL (x), NULL, NULL,
11789                                                     0, 0));
11790           /* Extra costs?  */
11791         }
11792       else
11793         {
11794           *cost += COSTS_N_INSNS (arm_gen_constant
11795                                   (outer_code, SImode, NULL,
11796                                    trunc_int_for_mode (INTVAL (x), SImode),
11797                                    NULL, NULL, 0, 0)
11798                                   + arm_gen_constant (outer_code, SImode, NULL,
11799                                                       INTVAL (x) >> 32, NULL,
11800                                                       NULL, 0, 0));
11801           /* Extra costs?  */
11802         }
11803
11804       return true;
11805
11806     case CONST:
11807     case LABEL_REF:
11808     case SYMBOL_REF:
11809       if (speed_p)
11810         {
11811           if (arm_arch_thumb2 && !flag_pic)
11812             *cost += COSTS_N_INSNS (1);
11813           else
11814             *cost += extra_cost->ldst.load;
11815         }
11816       else
11817         *cost += COSTS_N_INSNS (1);
11818
11819       if (flag_pic)
11820         {
11821           *cost += COSTS_N_INSNS (1);
11822           if (speed_p)
11823             *cost += extra_cost->alu.arith;
11824         }
11825
11826       return true;
11827
11828     case CONST_FIXED:
11829       *cost = COSTS_N_INSNS (4);
11830       /* Fixme.  */
11831       return true;
11832
11833     case CONST_DOUBLE:
11834       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11835           && (mode == SFmode || !TARGET_VFP_SINGLE))
11836         {
11837           if (vfp3_const_double_rtx (x))
11838             {
11839               if (speed_p)
11840                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11841               return true;
11842             }
11843
11844           if (speed_p)
11845             {
11846               if (mode == DFmode)
11847                 *cost += extra_cost->ldst.loadd;
11848               else
11849                 *cost += extra_cost->ldst.loadf;
11850             }
11851           else
11852             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11853
11854           return true;
11855         }
11856       *cost = COSTS_N_INSNS (4);
11857       return true;
11858
11859     case CONST_VECTOR:
11860       /* Fixme.  */
11861       if (((TARGET_NEON && TARGET_HARD_FLOAT
11862             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11863            || TARGET_HAVE_MVE)
11864           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11865         *cost = COSTS_N_INSNS (1);
11866       else
11867         *cost = COSTS_N_INSNS (4);
11868       return true;
11869
11870     case HIGH:
11871     case LO_SUM:
11872       /* When optimizing for size, we prefer constant pool entries to
11873          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11874       if (!speed_p)
11875         *cost += 1;
11876       return true;
11877
11878     case CLZ:
11879       if (speed_p)
11880         *cost += extra_cost->alu.clz;
11881       return false;
11882
11883     case SMIN:
11884       if (XEXP (x, 1) == const0_rtx)
11885         {
11886           if (speed_p)
11887             *cost += extra_cost->alu.log_shift;
11888           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11889           return true;
11890         }
11891       /* Fall through.  */
11892     case SMAX:
11893     case UMIN:
11894     case UMAX:
11895       *cost += COSTS_N_INSNS (1);
11896       return false;
11897
11898     case TRUNCATE:
11899       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11900           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11901           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11902           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11903           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11904                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11905               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11906                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11907                       == ZERO_EXTEND))))
11908         {
11909           if (speed_p)
11910             *cost += extra_cost->mult[1].extend;
11911           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11912                               ZERO_EXTEND, 0, speed_p)
11913                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11914                                 ZERO_EXTEND, 0, speed_p));
11915           return true;
11916         }
11917       *cost = LIBCALL_COST (1);
11918       return false;
11919
11920     case UNSPEC_VOLATILE:
11921     case UNSPEC:
11922       return arm_unspec_cost (x, outer_code, speed_p, cost);
11923
11924     case PC:
11925       /* Reading the PC is like reading any other register.  Writing it
11926          is more expensive, but we take that into account elsewhere.  */
11927       *cost = 0;
11928       return true;
11929
11930     case ZERO_EXTRACT:
11931       /* TODO: Simple zero_extract of bottom bits using AND.  */
11932       /* Fall through.  */
11933     case SIGN_EXTRACT:
11934       if (arm_arch6
11935           && mode == SImode
11936           && CONST_INT_P (XEXP (x, 1))
11937           && CONST_INT_P (XEXP (x, 2)))
11938         {
11939           if (speed_p)
11940             *cost += extra_cost->alu.bfx;
11941           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11942           return true;
11943         }
11944       /* Without UBFX/SBFX, need to resort to shift operations.  */
11945       *cost += COSTS_N_INSNS (1);
11946       if (speed_p)
11947         *cost += 2 * extra_cost->alu.shift;
11948       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11949       return true;
11950
11951     case FLOAT_EXTEND:
11952       if (TARGET_HARD_FLOAT)
11953         {
11954           if (speed_p)
11955             *cost += extra_cost->fp[mode == DFmode].widen;
11956           if (!TARGET_VFP5
11957               && GET_MODE (XEXP (x, 0)) == HFmode)
11958             {
11959               /* Pre v8, widening HF->DF is a two-step process, first
11960                  widening to SFmode.  */
11961               *cost += COSTS_N_INSNS (1);
11962               if (speed_p)
11963                 *cost += extra_cost->fp[0].widen;
11964             }
11965           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11966           return true;
11967         }
11968
11969       *cost = LIBCALL_COST (1);
11970       return false;
11971
11972     case FLOAT_TRUNCATE:
11973       if (TARGET_HARD_FLOAT)
11974         {
11975           if (speed_p)
11976             *cost += extra_cost->fp[mode == DFmode].narrow;
11977           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11978           return true;
11979           /* Vector modes?  */
11980         }
11981       *cost = LIBCALL_COST (1);
11982       return false;
11983
11984     case FMA:
11985       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11986         {
11987           rtx op0 = XEXP (x, 0);
11988           rtx op1 = XEXP (x, 1);
11989           rtx op2 = XEXP (x, 2);
11990
11991
11992           /* vfms or vfnma.  */
11993           if (GET_CODE (op0) == NEG)
11994             op0 = XEXP (op0, 0);
11995
11996           /* vfnms or vfnma.  */
11997           if (GET_CODE (op2) == NEG)
11998             op2 = XEXP (op2, 0);
11999
12000           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12001           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12002           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12003
12004           if (speed_p)
12005             *cost += extra_cost->fp[mode ==DFmode].fma;
12006
12007           return true;
12008         }
12009
12010       *cost = LIBCALL_COST (3);
12011       return false;
12012
12013     case FIX:
12014     case UNSIGNED_FIX:
12015       if (TARGET_HARD_FLOAT)
12016         {
12017           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12018              a vcvt fixed-point conversion.  */
12019           if (code == FIX && mode == SImode
12020               && GET_CODE (XEXP (x, 0)) == FIX
12021               && GET_MODE (XEXP (x, 0)) == SFmode
12022               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12023               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12024                  > 0)
12025             {
12026               if (speed_p)
12027                 *cost += extra_cost->fp[0].toint;
12028
12029               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12030                                  code, 0, speed_p);
12031               return true;
12032             }
12033
12034           if (GET_MODE_CLASS (mode) == MODE_INT)
12035             {
12036               mode = GET_MODE (XEXP (x, 0));
12037               if (speed_p)
12038                 *cost += extra_cost->fp[mode == DFmode].toint;
12039               /* Strip of the 'cost' of rounding towards zero.  */
12040               if (GET_CODE (XEXP (x, 0)) == FIX)
12041                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12042                                    0, speed_p);
12043               else
12044                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12045               /* ??? Increase the cost to deal with transferring from
12046                  FP -> CORE registers?  */
12047               return true;
12048             }
12049           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12050                    && TARGET_VFP5)
12051             {
12052               if (speed_p)
12053                 *cost += extra_cost->fp[mode == DFmode].roundint;
12054               return false;
12055             }
12056           /* Vector costs? */
12057         }
12058       *cost = LIBCALL_COST (1);
12059       return false;
12060
12061     case FLOAT:
12062     case UNSIGNED_FLOAT:
12063       if (TARGET_HARD_FLOAT)
12064         {
12065           /* ??? Increase the cost to deal with transferring from CORE
12066              -> FP registers?  */
12067           if (speed_p)
12068             *cost += extra_cost->fp[mode == DFmode].fromint;
12069           return false;
12070         }
12071       *cost = LIBCALL_COST (1);
12072       return false;
12073
12074     case CALL:
12075       return true;
12076
12077     case ASM_OPERANDS:
12078       {
12079       /* Just a guess.  Guess number of instructions in the asm
12080          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12081          though (see PR60663).  */
12082         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12083         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12084
12085         *cost = COSTS_N_INSNS (asm_length + num_operands);
12086         return true;
12087       }
12088     default:
12089       if (mode != VOIDmode)
12090         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12091       else
12092         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12093       return false;
12094     }
12095 }
12096
12097 #undef HANDLE_NARROW_SHIFT_ARITH
12098
12099 /* RTX costs entry point.  */
12100
12101 static bool
12102 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12103                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12104 {
12105   bool result;
12106   int code = GET_CODE (x);
12107   gcc_assert (current_tune->insn_extra_cost);
12108
12109   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12110                                 (enum rtx_code) outer_code,
12111                                 current_tune->insn_extra_cost,
12112                                 total, speed);
12113
12114   if (dump_file && arm_verbose_cost)
12115     {
12116       print_rtl_single (dump_file, x);
12117       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12118                *total, result ? "final" : "partial");
12119     }
12120   return result;
12121 }
12122
12123 static int
12124 arm_insn_cost (rtx_insn *insn, bool speed)
12125 {
12126   int cost;
12127
12128   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12129      will likely disappear during register allocation.  */
12130   if (!reload_completed
12131       && GET_CODE (PATTERN (insn)) == SET
12132       && REG_P (SET_DEST (PATTERN (insn)))
12133       && REG_P (SET_SRC (PATTERN (insn))))
12134     return 2;
12135   cost = pattern_cost (PATTERN (insn), speed);
12136   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12137      cost of these to be less than something we know about.  */
12138   return cost ? cost : COSTS_N_INSNS (2);
12139 }
12140
12141 /* All address computations that can be done are free, but rtx cost returns
12142    the same for practically all of them.  So we weight the different types
12143    of address here in the order (most pref first):
12144    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12145 static inline int
12146 arm_arm_address_cost (rtx x)
12147 {
12148   enum rtx_code c  = GET_CODE (x);
12149
12150   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12151     return 0;
12152   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12153     return 10;
12154
12155   if (c == PLUS)
12156     {
12157       if (CONST_INT_P (XEXP (x, 1)))
12158         return 2;
12159
12160       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12161         return 3;
12162
12163       return 4;
12164     }
12165
12166   return 6;
12167 }
12168
12169 static inline int
12170 arm_thumb_address_cost (rtx x)
12171 {
12172   enum rtx_code c  = GET_CODE (x);
12173
12174   if (c == REG)
12175     return 1;
12176   if (c == PLUS
12177       && REG_P (XEXP (x, 0))
12178       && CONST_INT_P (XEXP (x, 1)))
12179     return 1;
12180
12181   return 2;
12182 }
12183
12184 static int
12185 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12186                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12187 {
12188   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12189 }
12190
12191 /* Adjust cost hook for XScale.  */
12192 static bool
12193 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12194                           int * cost)
12195 {
12196   /* Some true dependencies can have a higher cost depending
12197      on precisely how certain input operands are used.  */
12198   if (dep_type == 0
12199       && recog_memoized (insn) >= 0
12200       && recog_memoized (dep) >= 0)
12201     {
12202       int shift_opnum = get_attr_shift (insn);
12203       enum attr_type attr_type = get_attr_type (dep);
12204
12205       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12206          operand for INSN.  If we have a shifted input operand and the
12207          instruction we depend on is another ALU instruction, then we may
12208          have to account for an additional stall.  */
12209       if (shift_opnum != 0
12210           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12211               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12212               || attr_type == TYPE_ALUS_SHIFT_IMM
12213               || attr_type == TYPE_LOGIC_SHIFT_IMM
12214               || attr_type == TYPE_LOGICS_SHIFT_IMM
12215               || attr_type == TYPE_ALU_SHIFT_REG
12216               || attr_type == TYPE_ALUS_SHIFT_REG
12217               || attr_type == TYPE_LOGIC_SHIFT_REG
12218               || attr_type == TYPE_LOGICS_SHIFT_REG
12219               || attr_type == TYPE_MOV_SHIFT
12220               || attr_type == TYPE_MVN_SHIFT
12221               || attr_type == TYPE_MOV_SHIFT_REG
12222               || attr_type == TYPE_MVN_SHIFT_REG))
12223         {
12224           rtx shifted_operand;
12225           int opno;
12226
12227           /* Get the shifted operand.  */
12228           extract_insn (insn);
12229           shifted_operand = recog_data.operand[shift_opnum];
12230
12231           /* Iterate over all the operands in DEP.  If we write an operand
12232              that overlaps with SHIFTED_OPERAND, then we have increase the
12233              cost of this dependency.  */
12234           extract_insn (dep);
12235           preprocess_constraints (dep);
12236           for (opno = 0; opno < recog_data.n_operands; opno++)
12237             {
12238               /* We can ignore strict inputs.  */
12239               if (recog_data.operand_type[opno] == OP_IN)
12240                 continue;
12241
12242               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12243                                            shifted_operand))
12244                 {
12245                   *cost = 2;
12246                   return false;
12247                 }
12248             }
12249         }
12250     }
12251   return true;
12252 }
12253
12254 /* Adjust cost hook for Cortex A9.  */
12255 static bool
12256 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12257                              int * cost)
12258 {
12259   switch (dep_type)
12260     {
12261     case REG_DEP_ANTI:
12262       *cost = 0;
12263       return false;
12264
12265     case REG_DEP_TRUE:
12266     case REG_DEP_OUTPUT:
12267         if (recog_memoized (insn) >= 0
12268             && recog_memoized (dep) >= 0)
12269           {
12270             if (GET_CODE (PATTERN (insn)) == SET)
12271               {
12272                 if (GET_MODE_CLASS
12273                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12274                   || GET_MODE_CLASS
12275                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12276                   {
12277                     enum attr_type attr_type_insn = get_attr_type (insn);
12278                     enum attr_type attr_type_dep = get_attr_type (dep);
12279
12280                     /* By default all dependencies of the form
12281                        s0 = s0 <op> s1
12282                        s0 = s0 <op> s2
12283                        have an extra latency of 1 cycle because
12284                        of the input and output dependency in this
12285                        case. However this gets modeled as an true
12286                        dependency and hence all these checks.  */
12287                     if (REG_P (SET_DEST (PATTERN (insn)))
12288                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12289                       {
12290                         /* FMACS is a special case where the dependent
12291                            instruction can be issued 3 cycles before
12292                            the normal latency in case of an output
12293                            dependency.  */
12294                         if ((attr_type_insn == TYPE_FMACS
12295                              || attr_type_insn == TYPE_FMACD)
12296                             && (attr_type_dep == TYPE_FMACS
12297                                 || attr_type_dep == TYPE_FMACD))
12298                           {
12299                             if (dep_type == REG_DEP_OUTPUT)
12300                               *cost = insn_default_latency (dep) - 3;
12301                             else
12302                               *cost = insn_default_latency (dep);
12303                             return false;
12304                           }
12305                         else
12306                           {
12307                             if (dep_type == REG_DEP_OUTPUT)
12308                               *cost = insn_default_latency (dep) + 1;
12309                             else
12310                               *cost = insn_default_latency (dep);
12311                           }
12312                         return false;
12313                       }
12314                   }
12315               }
12316           }
12317         break;
12318
12319     default:
12320       gcc_unreachable ();
12321     }
12322
12323   return true;
12324 }
12325
12326 /* Adjust cost hook for FA726TE.  */
12327 static bool
12328 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12329                            int * cost)
12330 {
12331   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12332      have penalty of 3.  */
12333   if (dep_type == REG_DEP_TRUE
12334       && recog_memoized (insn) >= 0
12335       && recog_memoized (dep) >= 0
12336       && get_attr_conds (dep) == CONDS_SET)
12337     {
12338       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12339       if (get_attr_conds (insn) == CONDS_USE
12340           && get_attr_type (insn) != TYPE_BRANCH)
12341         {
12342           *cost = 3;
12343           return false;
12344         }
12345
12346       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12347           || get_attr_conds (insn) == CONDS_USE)
12348         {
12349           *cost = 0;
12350           return false;
12351         }
12352     }
12353
12354   return true;
12355 }
12356
12357 /* Implement TARGET_REGISTER_MOVE_COST.
12358
12359    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12360    it is typically more expensive than a single memory access.  We set
12361    the cost to less than two memory accesses so that floating
12362    point to integer conversion does not go through memory.  */
12363
12364 int
12365 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12366                         reg_class_t from, reg_class_t to)
12367 {
12368   if (TARGET_32BIT)
12369     {
12370       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12371           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12372         return 15;
12373       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12374                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12375         return 4;
12376       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12377         return 20;
12378       else
12379         return 2;
12380     }
12381   else
12382     {
12383       if (from == HI_REGS || to == HI_REGS)
12384         return 4;
12385       else
12386         return 2;
12387     }
12388 }
12389
12390 /* Implement TARGET_MEMORY_MOVE_COST.  */
12391
12392 int
12393 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12394                       bool in ATTRIBUTE_UNUSED)
12395 {
12396   if (TARGET_32BIT)
12397     return 10;
12398   else
12399     {
12400       if (GET_MODE_SIZE (mode) < 4)
12401         return 8;
12402       else
12403         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12404     }
12405 }
12406
12407 /* Vectorizer cost model implementation.  */
12408
12409 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12410 static int
12411 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12412                                 tree vectype,
12413                                 int misalign ATTRIBUTE_UNUSED)
12414 {
12415   unsigned elements;
12416
12417   switch (type_of_cost)
12418     {
12419       case scalar_stmt:
12420         return current_tune->vec_costs->scalar_stmt_cost;
12421
12422       case scalar_load:
12423         return current_tune->vec_costs->scalar_load_cost;
12424
12425       case scalar_store:
12426         return current_tune->vec_costs->scalar_store_cost;
12427
12428       case vector_stmt:
12429         return current_tune->vec_costs->vec_stmt_cost;
12430
12431       case vector_load:
12432         return current_tune->vec_costs->vec_align_load_cost;
12433
12434       case vector_store:
12435         return current_tune->vec_costs->vec_store_cost;
12436
12437       case vec_to_scalar:
12438         return current_tune->vec_costs->vec_to_scalar_cost;
12439
12440       case scalar_to_vec:
12441         return current_tune->vec_costs->scalar_to_vec_cost;
12442
12443       case unaligned_load:
12444       case vector_gather_load:
12445         return current_tune->vec_costs->vec_unalign_load_cost;
12446
12447       case unaligned_store:
12448       case vector_scatter_store:
12449         return current_tune->vec_costs->vec_unalign_store_cost;
12450
12451       case cond_branch_taken:
12452         return current_tune->vec_costs->cond_taken_branch_cost;
12453
12454       case cond_branch_not_taken:
12455         return current_tune->vec_costs->cond_not_taken_branch_cost;
12456
12457       case vec_perm:
12458       case vec_promote_demote:
12459         return current_tune->vec_costs->vec_stmt_cost;
12460
12461       case vec_construct:
12462         elements = TYPE_VECTOR_SUBPARTS (vectype);
12463         return elements / 2 + 1;
12464
12465       default:
12466         gcc_unreachable ();
12467     }
12468 }
12469
12470 /* Return true if and only if this insn can dual-issue only as older.  */
12471 static bool
12472 cortexa7_older_only (rtx_insn *insn)
12473 {
12474   if (recog_memoized (insn) < 0)
12475     return false;
12476
12477   switch (get_attr_type (insn))
12478     {
12479     case TYPE_ALU_DSP_REG:
12480     case TYPE_ALU_SREG:
12481     case TYPE_ALUS_SREG:
12482     case TYPE_LOGIC_REG:
12483     case TYPE_LOGICS_REG:
12484     case TYPE_ADC_REG:
12485     case TYPE_ADCS_REG:
12486     case TYPE_ADR:
12487     case TYPE_BFM:
12488     case TYPE_REV:
12489     case TYPE_MVN_REG:
12490     case TYPE_SHIFT_IMM:
12491     case TYPE_SHIFT_REG:
12492     case TYPE_LOAD_BYTE:
12493     case TYPE_LOAD_4:
12494     case TYPE_STORE_4:
12495     case TYPE_FFARITHS:
12496     case TYPE_FADDS:
12497     case TYPE_FFARITHD:
12498     case TYPE_FADDD:
12499     case TYPE_FMOV:
12500     case TYPE_F_CVT:
12501     case TYPE_FCMPS:
12502     case TYPE_FCMPD:
12503     case TYPE_FCONSTS:
12504     case TYPE_FCONSTD:
12505     case TYPE_FMULS:
12506     case TYPE_FMACS:
12507     case TYPE_FMULD:
12508     case TYPE_FMACD:
12509     case TYPE_FDIVS:
12510     case TYPE_FDIVD:
12511     case TYPE_F_MRC:
12512     case TYPE_F_MRRC:
12513     case TYPE_F_FLAG:
12514     case TYPE_F_LOADS:
12515     case TYPE_F_STORES:
12516       return true;
12517     default:
12518       return false;
12519     }
12520 }
12521
12522 /* Return true if and only if this insn can dual-issue as younger.  */
12523 static bool
12524 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12525 {
12526   if (recog_memoized (insn) < 0)
12527     {
12528       if (verbose > 5)
12529         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12530       return false;
12531     }
12532
12533   switch (get_attr_type (insn))
12534     {
12535     case TYPE_ALU_IMM:
12536     case TYPE_ALUS_IMM:
12537     case TYPE_LOGIC_IMM:
12538     case TYPE_LOGICS_IMM:
12539     case TYPE_EXTEND:
12540     case TYPE_MVN_IMM:
12541     case TYPE_MOV_IMM:
12542     case TYPE_MOV_REG:
12543     case TYPE_MOV_SHIFT:
12544     case TYPE_MOV_SHIFT_REG:
12545     case TYPE_BRANCH:
12546     case TYPE_CALL:
12547       return true;
12548     default:
12549       return false;
12550     }
12551 }
12552
12553
12554 /* Look for an instruction that can dual issue only as an older
12555    instruction, and move it in front of any instructions that can
12556    dual-issue as younger, while preserving the relative order of all
12557    other instructions in the ready list.  This is a hueuristic to help
12558    dual-issue in later cycles, by postponing issue of more flexible
12559    instructions.  This heuristic may affect dual issue opportunities
12560    in the current cycle.  */
12561 static void
12562 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12563                         int *n_readyp, int clock)
12564 {
12565   int i;
12566   int first_older_only = -1, first_younger = -1;
12567
12568   if (verbose > 5)
12569     fprintf (file,
12570              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12571              clock,
12572              *n_readyp);
12573
12574   /* Traverse the ready list from the head (the instruction to issue
12575      first), and looking for the first instruction that can issue as
12576      younger and the first instruction that can dual-issue only as
12577      older.  */
12578   for (i = *n_readyp - 1; i >= 0; i--)
12579     {
12580       rtx_insn *insn = ready[i];
12581       if (cortexa7_older_only (insn))
12582         {
12583           first_older_only = i;
12584           if (verbose > 5)
12585             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12586           break;
12587         }
12588       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12589         first_younger = i;
12590     }
12591
12592   /* Nothing to reorder because either no younger insn found or insn
12593      that can dual-issue only as older appears before any insn that
12594      can dual-issue as younger.  */
12595   if (first_younger == -1)
12596     {
12597       if (verbose > 5)
12598         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12599       return;
12600     }
12601
12602   /* Nothing to reorder because no older-only insn in the ready list.  */
12603   if (first_older_only == -1)
12604     {
12605       if (verbose > 5)
12606         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12607       return;
12608     }
12609
12610   /* Move first_older_only insn before first_younger.  */
12611   if (verbose > 5)
12612     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12613              INSN_UID(ready [first_older_only]),
12614              INSN_UID(ready [first_younger]));
12615   rtx_insn *first_older_only_insn = ready [first_older_only];
12616   for (i = first_older_only; i < first_younger; i++)
12617     {
12618       ready[i] = ready[i+1];
12619     }
12620
12621   ready[i] = first_older_only_insn;
12622   return;
12623 }
12624
12625 /* Implement TARGET_SCHED_REORDER. */
12626 static int
12627 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12628                    int clock)
12629 {
12630   switch (arm_tune)
12631     {
12632     case TARGET_CPU_cortexa7:
12633       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12634       break;
12635     default:
12636       /* Do nothing for other cores.  */
12637       break;
12638     }
12639
12640   return arm_issue_rate ();
12641 }
12642
12643 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12644    It corrects the value of COST based on the relationship between
12645    INSN and DEP through the dependence LINK.  It returns the new
12646    value. There is a per-core adjust_cost hook to adjust scheduler costs
12647    and the per-core hook can choose to completely override the generic
12648    adjust_cost function. Only put bits of code into arm_adjust_cost that
12649    are common across all cores.  */
12650 static int
12651 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12652                  unsigned int)
12653 {
12654   rtx i_pat, d_pat;
12655
12656  /* When generating Thumb-1 code, we want to place flag-setting operations
12657     close to a conditional branch which depends on them, so that we can
12658     omit the comparison. */
12659   if (TARGET_THUMB1
12660       && dep_type == 0
12661       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12662       && recog_memoized (dep) >= 0
12663       && get_attr_conds (dep) == CONDS_SET)
12664     return 0;
12665
12666   if (current_tune->sched_adjust_cost != NULL)
12667     {
12668       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12669         return cost;
12670     }
12671
12672   /* XXX Is this strictly true?  */
12673   if (dep_type == REG_DEP_ANTI
12674       || dep_type == REG_DEP_OUTPUT)
12675     return 0;
12676
12677   /* Call insns don't incur a stall, even if they follow a load.  */
12678   if (dep_type == 0
12679       && CALL_P (insn))
12680     return 1;
12681
12682   if ((i_pat = single_set (insn)) != NULL
12683       && MEM_P (SET_SRC (i_pat))
12684       && (d_pat = single_set (dep)) != NULL
12685       && MEM_P (SET_DEST (d_pat)))
12686     {
12687       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12688       /* This is a load after a store, there is no conflict if the load reads
12689          from a cached area.  Assume that loads from the stack, and from the
12690          constant pool are cached, and that others will miss.  This is a
12691          hack.  */
12692
12693       if ((SYMBOL_REF_P (src_mem)
12694            && CONSTANT_POOL_ADDRESS_P (src_mem))
12695           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12696           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12697           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12698         return 1;
12699     }
12700
12701   return cost;
12702 }
12703
12704 int
12705 arm_max_conditional_execute (void)
12706 {
12707   return max_insns_skipped;
12708 }
12709
12710 static int
12711 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12712 {
12713   if (TARGET_32BIT)
12714     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12715   else
12716     return (optimize > 0) ? 2 : 0;
12717 }
12718
12719 static int
12720 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12721 {
12722   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12723 }
12724
12725 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12726    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12727    sequences of non-executed instructions in IT blocks probably take the same
12728    amount of time as executed instructions (and the IT instruction itself takes
12729    space in icache).  This function was experimentally determined to give good
12730    results on a popular embedded benchmark.  */
12731
12732 static int
12733 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12734 {
12735   return (TARGET_32BIT && speed_p) ? 1
12736          : arm_default_branch_cost (speed_p, predictable_p);
12737 }
12738
12739 static int
12740 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12741 {
12742   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12743 }
12744
12745 static bool fp_consts_inited = false;
12746
12747 static REAL_VALUE_TYPE value_fp0;
12748
12749 static void
12750 init_fp_table (void)
12751 {
12752   REAL_VALUE_TYPE r;
12753
12754   r = REAL_VALUE_ATOF ("0", DFmode);
12755   value_fp0 = r;
12756   fp_consts_inited = true;
12757 }
12758
12759 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12760 int
12761 arm_const_double_rtx (rtx x)
12762 {
12763   const REAL_VALUE_TYPE *r;
12764
12765   if (!fp_consts_inited)
12766     init_fp_table ();
12767
12768   r = CONST_DOUBLE_REAL_VALUE (x);
12769   if (REAL_VALUE_MINUS_ZERO (*r))
12770     return 0;
12771
12772   if (real_equal (r, &value_fp0))
12773     return 1;
12774
12775   return 0;
12776 }
12777
12778 /* VFPv3 has a fairly wide range of representable immediates, formed from
12779    "quarter-precision" floating-point values. These can be evaluated using this
12780    formula (with ^ for exponentiation):
12781
12782      -1^s * n * 2^-r
12783
12784    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12785    16 <= n <= 31 and 0 <= r <= 7.
12786
12787    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12788
12789      - A (most-significant) is the sign bit.
12790      - BCD are the exponent (encoded as r XOR 3).
12791      - EFGH are the mantissa (encoded as n - 16).
12792 */
12793
12794 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12795    fconst[sd] instruction, or -1 if X isn't suitable.  */
12796 static int
12797 vfp3_const_double_index (rtx x)
12798 {
12799   REAL_VALUE_TYPE r, m;
12800   int sign, exponent;
12801   unsigned HOST_WIDE_INT mantissa, mant_hi;
12802   unsigned HOST_WIDE_INT mask;
12803   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12804   bool fail;
12805
12806   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12807     return -1;
12808
12809   r = *CONST_DOUBLE_REAL_VALUE (x);
12810
12811   /* We can't represent these things, so detect them first.  */
12812   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12813     return -1;
12814
12815   /* Extract sign, exponent and mantissa.  */
12816   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12817   r = real_value_abs (&r);
12818   exponent = REAL_EXP (&r);
12819   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12820      highest (sign) bit, with a fixed binary point at bit point_pos.
12821      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12822      bits for the mantissa, this may fail (low bits would be lost).  */
12823   real_ldexp (&m, &r, point_pos - exponent);
12824   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12825   mantissa = w.elt (0);
12826   mant_hi = w.elt (1);
12827
12828   /* If there are bits set in the low part of the mantissa, we can't
12829      represent this value.  */
12830   if (mantissa != 0)
12831     return -1;
12832
12833   /* Now make it so that mantissa contains the most-significant bits, and move
12834      the point_pos to indicate that the least-significant bits have been
12835      discarded.  */
12836   point_pos -= HOST_BITS_PER_WIDE_INT;
12837   mantissa = mant_hi;
12838
12839   /* We can permit four significant bits of mantissa only, plus a high bit
12840      which is always 1.  */
12841   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12842   if ((mantissa & mask) != 0)
12843     return -1;
12844
12845   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12846   mantissa >>= point_pos - 5;
12847
12848   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12849      floating-point immediate zero with Neon using an integer-zero load, but
12850      that case is handled elsewhere.)  */
12851   if (mantissa == 0)
12852     return -1;
12853
12854   gcc_assert (mantissa >= 16 && mantissa <= 31);
12855
12856   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12857      normalized significands are in the range [1, 2). (Our mantissa is shifted
12858      left 4 places at this point relative to normalized IEEE754 values).  GCC
12859      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12860      REAL_EXP must be altered.  */
12861   exponent = 5 - exponent;
12862
12863   if (exponent < 0 || exponent > 7)
12864     return -1;
12865
12866   /* Sign, mantissa and exponent are now in the correct form to plug into the
12867      formula described in the comment above.  */
12868   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12869 }
12870
12871 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12872 int
12873 vfp3_const_double_rtx (rtx x)
12874 {
12875   if (!TARGET_VFP3)
12876     return 0;
12877
12878   return vfp3_const_double_index (x) != -1;
12879 }
12880
12881 /* Recognize immediates which can be used in various Neon and MVE instructions.
12882    Legal immediates are described by the following table (for VMVN variants, the
12883    bitwise inverse of the constant shown is recognized. In either case, VMOV
12884    is output and the correct instruction to use for a given constant is chosen
12885    by the assembler). The constant shown is replicated across all elements of
12886    the destination vector.
12887
12888    insn elems variant constant (binary)
12889    ---- ----- ------- -----------------
12890    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12891    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12892    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12893    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12894    vmov  i16     4    00000000 abcdefgh
12895    vmov  i16     5    abcdefgh 00000000
12896    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12897    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12898    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12899    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12900    vmvn  i16    10    00000000 abcdefgh
12901    vmvn  i16    11    abcdefgh 00000000
12902    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12903    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12904    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12905    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12906    vmov   i8    16    abcdefgh
12907    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12908                       eeeeeeee ffffffff gggggggg hhhhhhhh
12909    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12910    vmov  f32    19    00000000 00000000 00000000 00000000
12911
12912    For case 18, B = !b. Representable values are exactly those accepted by
12913    vfp3_const_double_index, but are output as floating-point numbers rather
12914    than indices.
12915
12916    For case 19, we will change it to vmov.i32 when assembling.
12917
12918    Variants 0-5 (inclusive) may also be used as immediates for the second
12919    operand of VORR/VBIC instructions.
12920
12921    The INVERSE argument causes the bitwise inverse of the given operand to be
12922    recognized instead (used for recognizing legal immediates for the VAND/VORN
12923    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12924    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12925    output, rather than the real insns vbic/vorr).
12926
12927    INVERSE makes no difference to the recognition of float vectors.
12928
12929    The return value is the variant of immediate as shown in the above table, or
12930    -1 if the given value doesn't match any of the listed patterns.
12931 */
12932 static int
12933 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12934                       rtx *modconst, int *elementwidth)
12935 {
12936 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12937   matches = 1;                                  \
12938   for (i = 0; i < idx; i += (STRIDE))           \
12939     if (!(TEST))                                \
12940       matches = 0;                              \
12941   if (matches)                                  \
12942     {                                           \
12943       immtype = (CLASS);                        \
12944       elsize = (ELSIZE);                        \
12945       break;                                    \
12946     }
12947
12948   unsigned int i, elsize = 0, idx = 0, n_elts;
12949   unsigned int innersize;
12950   unsigned char bytes[16] = {};
12951   int immtype = -1, matches;
12952   unsigned int invmask = inverse ? 0xff : 0;
12953   bool vector = GET_CODE (op) == CONST_VECTOR;
12954
12955   if (vector)
12956     n_elts = CONST_VECTOR_NUNITS (op);
12957   else
12958     {
12959       n_elts = 1;
12960       gcc_assert (mode != VOIDmode);
12961     }
12962
12963   innersize = GET_MODE_UNIT_SIZE (mode);
12964
12965   /* Only support 128-bit vectors for MVE.  */
12966   if (TARGET_HAVE_MVE
12967       && (!vector
12968           || VALID_MVE_PRED_MODE (mode)
12969           || n_elts * innersize != 16))
12970     return -1;
12971
12972   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12973     return -1;
12974
12975   /* Vectors of float constants.  */
12976   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12977     {
12978       rtx el0 = CONST_VECTOR_ELT (op, 0);
12979
12980       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12981         return -1;
12982
12983       /* FP16 vectors cannot be represented.  */
12984       if (GET_MODE_INNER (mode) == HFmode)
12985         return -1;
12986
12987       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12988          are distinct in this context.  */
12989       if (!const_vec_duplicate_p (op))
12990         return -1;
12991
12992       if (modconst)
12993         *modconst = CONST_VECTOR_ELT (op, 0);
12994
12995       if (elementwidth)
12996         *elementwidth = 0;
12997
12998       if (el0 == CONST0_RTX (GET_MODE (el0)))
12999         return 19;
13000       else
13001         return 18;
13002     }
13003
13004   /* The tricks done in the code below apply for little-endian vector layout.
13005      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13006      FIXME: Implement logic for big-endian vectors.  */
13007   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13008     return -1;
13009
13010   /* Splat vector constant out into a byte vector.  */
13011   for (i = 0; i < n_elts; i++)
13012     {
13013       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13014       unsigned HOST_WIDE_INT elpart;
13015
13016       gcc_assert (CONST_INT_P (el));
13017       elpart = INTVAL (el);
13018
13019       for (unsigned int byte = 0; byte < innersize; byte++)
13020         {
13021           bytes[idx++] = (elpart & 0xff) ^ invmask;
13022           elpart >>= BITS_PER_UNIT;
13023         }
13024     }
13025
13026   /* Sanity check.  */
13027   gcc_assert (idx == GET_MODE_SIZE (mode));
13028
13029   do
13030     {
13031       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13032                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13033
13034       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13035                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13036
13037       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13038                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13039
13040       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13041                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13042
13043       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13044
13045       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13046
13047       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13048                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13049
13050       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13051                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13052
13053       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13054                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13055
13056       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13057                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13058
13059       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13060
13061       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13062
13063       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13064                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13065
13066       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13067                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13068
13069       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13070                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13071
13072       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13073                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13074
13075       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13076
13077       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13078                         && bytes[i] == bytes[(i + 8) % idx]);
13079     }
13080   while (0);
13081
13082   if (immtype == -1)
13083     return -1;
13084
13085   if (elementwidth)
13086     *elementwidth = elsize;
13087
13088   if (modconst)
13089     {
13090       unsigned HOST_WIDE_INT imm = 0;
13091
13092       /* Un-invert bytes of recognized vector, if necessary.  */
13093       if (invmask != 0)
13094         for (i = 0; i < idx; i++)
13095           bytes[i] ^= invmask;
13096
13097       if (immtype == 17)
13098         {
13099           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13100           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13101
13102           for (i = 0; i < 8; i++)
13103             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13104                    << (i * BITS_PER_UNIT);
13105
13106           *modconst = GEN_INT (imm);
13107         }
13108       else
13109         {
13110           unsigned HOST_WIDE_INT imm = 0;
13111
13112           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13113             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13114
13115           *modconst = GEN_INT (imm);
13116         }
13117     }
13118
13119   return immtype;
13120 #undef CHECK
13121 }
13122
13123 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13124    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13125    (or zero for float elements), and a modified constant (whatever should be
13126    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13127    modified to "simd_immediate_valid_for_move" as this function will be used
13128    both by neon and mve.  */
13129 int
13130 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13131                                rtx *modconst, int *elementwidth)
13132 {
13133   rtx tmpconst;
13134   int tmpwidth;
13135   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13136
13137   if (retval == -1)
13138     return 0;
13139
13140   if (modconst)
13141     *modconst = tmpconst;
13142
13143   if (elementwidth)
13144     *elementwidth = tmpwidth;
13145
13146   return 1;
13147 }
13148
13149 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13150    the immediate is valid, write a constant suitable for using as an operand
13151    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13152    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13153
13154 int
13155 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13156                                 rtx *modconst, int *elementwidth)
13157 {
13158   rtx tmpconst;
13159   int tmpwidth;
13160   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13161
13162   if (retval < 0 || retval > 5)
13163     return 0;
13164
13165   if (modconst)
13166     *modconst = tmpconst;
13167
13168   if (elementwidth)
13169     *elementwidth = tmpwidth;
13170
13171   return 1;
13172 }
13173
13174 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13175    the immediate is valid, write a constant suitable for using as an operand
13176    to VSHR/VSHL to *MODCONST and the corresponding element width to
13177    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13178    because they have different limitations.  */
13179
13180 int
13181 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13182                                 rtx *modconst, int *elementwidth,
13183                                 bool isleftshift)
13184 {
13185   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13186   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13187   unsigned HOST_WIDE_INT last_elt = 0;
13188   unsigned HOST_WIDE_INT maxshift;
13189
13190   /* Split vector constant out into a byte vector.  */
13191   for (i = 0; i < n_elts; i++)
13192     {
13193       rtx el = CONST_VECTOR_ELT (op, i);
13194       unsigned HOST_WIDE_INT elpart;
13195
13196       if (CONST_INT_P (el))
13197         elpart = INTVAL (el);
13198       else if (CONST_DOUBLE_P (el))
13199         return 0;
13200       else
13201         gcc_unreachable ();
13202
13203       if (i != 0 && elpart != last_elt)
13204         return 0;
13205
13206       last_elt = elpart;
13207     }
13208
13209   /* Shift less than element size.  */
13210   maxshift = innersize * 8;
13211
13212   if (isleftshift)
13213     {
13214       /* Left shift immediate value can be from 0 to <size>-1.  */
13215       if (last_elt >= maxshift)
13216         return 0;
13217     }
13218   else
13219     {
13220       /* Right shift immediate value can be from 1 to <size>.  */
13221       if (last_elt == 0 || last_elt > maxshift)
13222         return 0;
13223     }
13224
13225   if (elementwidth)
13226     *elementwidth = innersize * 8;
13227
13228   if (modconst)
13229     *modconst = CONST_VECTOR_ELT (op, 0);
13230
13231   return 1;
13232 }
13233
13234 /* Return a string suitable for output of Neon immediate logic operation
13235    MNEM.  */
13236
13237 char *
13238 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13239                              int inverse, int quad)
13240 {
13241   int width, is_valid;
13242   static char templ[40];
13243
13244   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13245
13246   gcc_assert (is_valid != 0);
13247
13248   if (quad)
13249     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13250   else
13251     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13252
13253   return templ;
13254 }
13255
13256 /* Return a string suitable for output of Neon immediate shift operation
13257    (VSHR or VSHL) MNEM.  */
13258
13259 char *
13260 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13261                              machine_mode mode, int quad,
13262                              bool isleftshift)
13263 {
13264   int width, is_valid;
13265   static char templ[40];
13266
13267   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13268   gcc_assert (is_valid != 0);
13269
13270   if (quad)
13271     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13272   else
13273     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13274
13275   return templ;
13276 }
13277
13278 /* Output a sequence of pairwise operations to implement a reduction.
13279    NOTE: We do "too much work" here, because pairwise operations work on two
13280    registers-worth of operands in one go. Unfortunately we can't exploit those
13281    extra calculations to do the full operation in fewer steps, I don't think.
13282    Although all vector elements of the result but the first are ignored, we
13283    actually calculate the same result in each of the elements. An alternative
13284    such as initially loading a vector with zero to use as each of the second
13285    operands would use up an additional register and take an extra instruction,
13286    for no particular gain.  */
13287
13288 void
13289 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13290                       rtx (*reduc) (rtx, rtx, rtx))
13291 {
13292   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13293   rtx tmpsum = op1;
13294
13295   for (i = parts / 2; i >= 1; i /= 2)
13296     {
13297       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13298       emit_insn (reduc (dest, tmpsum, tmpsum));
13299       tmpsum = dest;
13300     }
13301 }
13302
13303 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13304    loaded into a register using VDUP.
13305
13306    If this is the case, and GENERATE is set, we also generate
13307    instructions to do this and return an RTX to assign to the register.  */
13308
13309 static rtx
13310 neon_vdup_constant (rtx vals, bool generate)
13311 {
13312   machine_mode mode = GET_MODE (vals);
13313   machine_mode inner_mode = GET_MODE_INNER (mode);
13314   rtx x;
13315
13316   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13317     return NULL_RTX;
13318
13319   if (!const_vec_duplicate_p (vals, &x))
13320     /* The elements are not all the same.  We could handle repeating
13321        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13322        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13323        vdup.i16).  */
13324     return NULL_RTX;
13325
13326   if (!generate)
13327     return x;
13328
13329   /* We can load this constant by using VDUP and a constant in a
13330      single ARM register.  This will be cheaper than a vector
13331      load.  */
13332
13333   x = copy_to_mode_reg (inner_mode, x);
13334   return gen_vec_duplicate (mode, x);
13335 }
13336
13337 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13338 rtx
13339 mve_bool_vec_to_const (rtx const_vec)
13340 {
13341   machine_mode mode = GET_MODE (const_vec);
13342
13343   if (!VECTOR_MODE_P (mode))
13344     return const_vec;
13345
13346   unsigned n_elts = GET_MODE_NUNITS (mode);
13347   unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13348   unsigned shift_c = 16 / n_elts;
13349   unsigned i;
13350   int hi_val = 0;
13351
13352   for (i = 0; i < n_elts; i++)
13353     {
13354       rtx el = CONST_VECTOR_ELT (const_vec, i);
13355       unsigned HOST_WIDE_INT elpart;
13356
13357       gcc_assert (CONST_INT_P (el));
13358       elpart = INTVAL (el) & ((1U << el_prec) - 1);
13359
13360       unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13361
13362       hi_val |= elpart << (index * shift_c);
13363     }
13364   /* We are using mov immediate to encode this constant which writes 32-bits
13365      so we need to make sure the top 16-bits are all 0, otherwise we can't
13366      guarantee we can actually write this immediate.  */
13367   return gen_int_mode (hi_val, SImode);
13368 }
13369
13370 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13371    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13372    into a register.
13373
13374    If this is the case, and GENERATE is set, we also generate code to do
13375    this and return an RTX to copy into the register.  */
13376
13377 rtx
13378 neon_make_constant (rtx vals, bool generate)
13379 {
13380   machine_mode mode = GET_MODE (vals);
13381   rtx target;
13382   rtx const_vec = NULL_RTX;
13383   int n_elts = GET_MODE_NUNITS (mode);
13384   int n_const = 0;
13385   int i;
13386
13387   if (GET_CODE (vals) == CONST_VECTOR)
13388     const_vec = vals;
13389   else if (GET_CODE (vals) == PARALLEL)
13390     {
13391       /* A CONST_VECTOR must contain only CONST_INTs and
13392          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13393          Only store valid constants in a CONST_VECTOR.  */
13394       for (i = 0; i < n_elts; ++i)
13395         {
13396           rtx x = XVECEXP (vals, 0, i);
13397           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13398             n_const++;
13399         }
13400       if (n_const == n_elts)
13401         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13402     }
13403   else
13404     gcc_unreachable ();
13405
13406   if (const_vec != NULL
13407       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13408     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13409     return const_vec;
13410   else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13411     return mve_bool_vec_to_const (const_vec);
13412   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13413     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13414        pipeline cycle; creating the constant takes one or two ARM
13415        pipeline cycles.  */
13416     return target;
13417   else if (const_vec != NULL_RTX)
13418     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13419        (for either double or quad vectors).  We cannot take advantage
13420        of single-cycle VLD1 because we need a PC-relative addressing
13421        mode.  */
13422     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13423   else
13424     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13425        We cannot construct an initializer.  */
13426     return NULL_RTX;
13427 }
13428
13429 /* Initialize vector TARGET to VALS.  */
13430
13431 void
13432 neon_expand_vector_init (rtx target, rtx vals)
13433 {
13434   machine_mode mode = GET_MODE (target);
13435   machine_mode inner_mode = GET_MODE_INNER (mode);
13436   int n_elts = GET_MODE_NUNITS (mode);
13437   int n_var = 0, one_var = -1;
13438   bool all_same = true;
13439   rtx x, mem;
13440   int i;
13441
13442   for (i = 0; i < n_elts; ++i)
13443     {
13444       x = XVECEXP (vals, 0, i);
13445       if (!CONSTANT_P (x))
13446         ++n_var, one_var = i;
13447
13448       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13449         all_same = false;
13450     }
13451
13452   if (n_var == 0)
13453     {
13454       rtx constant = neon_make_constant (vals);
13455       if (constant != NULL_RTX)
13456         {
13457           emit_move_insn (target, constant);
13458           return;
13459         }
13460     }
13461
13462   /* Splat a single non-constant element if we can.  */
13463   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13464     {
13465       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13466       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13467       return;
13468     }
13469
13470   /* One field is non-constant.  Load constant then overwrite varying
13471      field.  This is more efficient than using the stack.  */
13472   if (n_var == 1)
13473     {
13474       rtx copy = copy_rtx (vals);
13475       rtx merge_mask = GEN_INT (1 << one_var);
13476
13477       /* Load constant part of vector, substitute neighboring value for
13478          varying element.  */
13479       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13480       neon_expand_vector_init (target, copy);
13481
13482       /* Insert variable.  */
13483       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13484       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13485       return;
13486     }
13487
13488   /* Construct the vector in memory one field at a time
13489      and load the whole vector.  */
13490   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13491   for (i = 0; i < n_elts; i++)
13492     emit_move_insn (adjust_address_nv (mem, inner_mode,
13493                                     i * GET_MODE_SIZE (inner_mode)),
13494                     XVECEXP (vals, 0, i));
13495   emit_move_insn (target, mem);
13496 }
13497
13498 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13499    ERR if it doesn't.  EXP indicates the source location, which includes the
13500    inlining history for intrinsics.  */
13501
13502 static void
13503 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13504               const_tree exp, const char *desc)
13505 {
13506   HOST_WIDE_INT lane;
13507
13508   gcc_assert (CONST_INT_P (operand));
13509
13510   lane = INTVAL (operand);
13511
13512   if (lane < low || lane >= high)
13513     {
13514       if (exp)
13515         error_at (EXPR_LOCATION (exp),
13516                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13517       else
13518         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13519     }
13520 }
13521
13522 /* Bounds-check lanes.  */
13523
13524 void
13525 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13526                   const_tree exp)
13527 {
13528   bounds_check (operand, low, high, exp, "lane");
13529 }
13530
13531 /* Bounds-check constants.  */
13532
13533 void
13534 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13535 {
13536   bounds_check (operand, low, high, NULL_TREE, "constant");
13537 }
13538
13539 HOST_WIDE_INT
13540 neon_element_bits (machine_mode mode)
13541 {
13542   return GET_MODE_UNIT_BITSIZE (mode);
13543 }
13544
13545 \f
13546 /* Predicates for `match_operand' and `match_operator'.  */
13547
13548 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13549    WB level is 2 if full writeback address modes are allowed, 1
13550    if limited writeback address modes (POST_INC and PRE_DEC) are
13551    allowed and 0 if no writeback at all is supported.  */
13552
13553 int
13554 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13555 {
13556   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13557   rtx ind;
13558
13559   /* Reject eliminable registers.  */
13560   if (! (reload_in_progress || reload_completed || lra_in_progress)
13561       && (   reg_mentioned_p (frame_pointer_rtx, op)
13562           || reg_mentioned_p (arg_pointer_rtx, op)
13563           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13564           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13565           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13566           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13567     return FALSE;
13568
13569   /* Constants are converted into offsets from labels.  */
13570   if (!MEM_P (op))
13571     return FALSE;
13572
13573   ind = XEXP (op, 0);
13574
13575   if (reload_completed
13576       && (LABEL_REF_P (ind)
13577           || (GET_CODE (ind) == CONST
13578               && GET_CODE (XEXP (ind, 0)) == PLUS
13579               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13580               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13581     return TRUE;
13582
13583   /* Match: (mem (reg)).  */
13584   if (REG_P (ind))
13585     return arm_address_register_rtx_p (ind, 0);
13586
13587   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13588      acceptable in any case (subject to verification by
13589      arm_address_register_rtx_p).  We need full writeback to accept
13590      PRE_INC and POST_DEC, and at least restricted writeback for
13591      PRE_INC and POST_DEC.  */
13592   if (wb_level > 0
13593       && (GET_CODE (ind) == POST_INC
13594           || GET_CODE (ind) == PRE_DEC
13595           || (wb_level > 1
13596               && (GET_CODE (ind) == PRE_INC
13597                   || GET_CODE (ind) == POST_DEC))))
13598     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13599
13600   if (wb_level > 1
13601       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13602       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13603       && GET_CODE (XEXP (ind, 1)) == PLUS
13604       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13605     ind = XEXP (ind, 1);
13606
13607   /* Match:
13608      (plus (reg)
13609            (const))
13610
13611      The encoded immediate for 16-bit modes is multiplied by 2,
13612      while the encoded immediate for 32-bit and 64-bit modes is
13613      multiplied by 4.  */
13614   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13615   if (GET_CODE (ind) == PLUS
13616       && REG_P (XEXP (ind, 0))
13617       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13618       && CONST_INT_P (XEXP (ind, 1))
13619       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13620       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13621     return TRUE;
13622
13623   return FALSE;
13624 }
13625
13626 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13627    WB is true if full writeback address modes are allowed and is false
13628    if limited writeback address modes (POST_INC and PRE_DEC) are
13629    allowed.  */
13630
13631 int arm_coproc_mem_operand (rtx op, bool wb)
13632 {
13633   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13634 }
13635
13636 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13637    context in which no writeback address modes are allowed.  */
13638
13639 int
13640 arm_coproc_mem_operand_no_writeback (rtx op)
13641 {
13642   return arm_coproc_mem_operand_wb (op, 0);
13643 }
13644
13645 /* This function returns TRUE on matching mode and op.
13646 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13647 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13648 int
13649 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13650 {
13651   enum rtx_code code;
13652   int val, reg_no;
13653
13654   /* Match: (mem (reg)).  */
13655   if (REG_P (op))
13656     {
13657       int reg_no = REGNO (op);
13658       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13659                ? reg_no <= LAST_LO_REGNUM
13660                : reg_no < LAST_ARM_REGNUM)
13661               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13662     }
13663   code = GET_CODE (op);
13664
13665   if (code == POST_INC || code == PRE_DEC
13666       || code == PRE_INC || code == POST_DEC)
13667     {
13668       reg_no = REGNO (XEXP (op, 0));
13669       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13670                ? reg_no <= LAST_LO_REGNUM
13671                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13672               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13673     }
13674   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13675             && GET_CODE (XEXP (op, 1)) == PLUS
13676             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13677             && REG_P (XEXP (op, 0))
13678             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13679            /* Make sure to only accept PLUS after reload_completed, otherwise
13680               this will interfere with auto_inc's pattern detection.  */
13681            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13682                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13683     {
13684       reg_no = REGNO (XEXP (op, 0));
13685       if (code == PLUS)
13686         val = INTVAL (XEXP (op, 1));
13687       else
13688         val = INTVAL (XEXP(XEXP (op, 1), 1));
13689
13690       switch (mode)
13691         {
13692           case E_V16QImode:
13693           case E_V8QImode:
13694           case E_V4QImode:
13695             if (abs (val) > 127)
13696               return FALSE;
13697             break;
13698           case E_V8HImode:
13699           case E_V8HFmode:
13700           case E_V4HImode:
13701           case E_V4HFmode:
13702             if (val % 2 != 0 || abs (val) > 254)
13703               return FALSE;
13704             break;
13705           case E_V4SImode:
13706           case E_V4SFmode:
13707             if (val % 4 != 0 || abs (val) > 508)
13708               return FALSE;
13709             break;
13710           default:
13711             return FALSE;
13712         }
13713       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13714               || (MVE_STN_LDW_MODE (mode)
13715                   ? reg_no <= LAST_LO_REGNUM
13716                   : (reg_no < LAST_ARM_REGNUM
13717                      && (code == PLUS || reg_no != SP_REGNUM))));
13718     }
13719   return FALSE;
13720 }
13721
13722 /* Return TRUE if OP is a memory operand which we can load or store a vector
13723    to/from. TYPE is one of the following values:
13724     0 - Vector load/stor (vldr)
13725     1 - Core registers (ldm)
13726     2 - Element/structure loads (vld1)
13727  */
13728 int
13729 neon_vector_mem_operand (rtx op, int type, bool strict)
13730 {
13731   rtx ind;
13732
13733   /* Reject eliminable registers.  */
13734   if (strict && ! (reload_in_progress || reload_completed)
13735       && (reg_mentioned_p (frame_pointer_rtx, op)
13736           || reg_mentioned_p (arg_pointer_rtx, op)
13737           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13738           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13739           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13740           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13741     return FALSE;
13742
13743   /* Constants are converted into offsets from labels.  */
13744   if (!MEM_P (op))
13745     return FALSE;
13746
13747   ind = XEXP (op, 0);
13748
13749   if (reload_completed
13750       && (LABEL_REF_P (ind)
13751           || (GET_CODE (ind) == CONST
13752               && GET_CODE (XEXP (ind, 0)) == PLUS
13753               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13754               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13755     return TRUE;
13756
13757   /* Match: (mem (reg)).  */
13758   if (REG_P (ind))
13759     return arm_address_register_rtx_p (ind, 0);
13760
13761   /* Allow post-increment with Neon registers.  */
13762   if ((type != 1 && GET_CODE (ind) == POST_INC)
13763       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13764     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13765
13766   /* Allow post-increment by register for VLDn */
13767   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13768       && GET_CODE (XEXP (ind, 1)) == PLUS
13769       && REG_P (XEXP (XEXP (ind, 1), 1))
13770       && REG_P (XEXP (ind, 0))
13771       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13772      return true;
13773
13774   /* Match:
13775      (plus (reg)
13776           (const)).  */
13777   if (type == 0
13778       && GET_CODE (ind) == PLUS
13779       && REG_P (XEXP (ind, 0))
13780       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13781       && CONST_INT_P (XEXP (ind, 1))
13782       && INTVAL (XEXP (ind, 1)) > -1024
13783       /* For quad modes, we restrict the constant offset to be slightly less
13784          than what the instruction format permits.  We have no such constraint
13785          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13786       && (INTVAL (XEXP (ind, 1))
13787           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13788       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13789     return TRUE;
13790
13791   return FALSE;
13792 }
13793
13794 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13795    type.  */
13796 int
13797 mve_struct_mem_operand (rtx op)
13798 {
13799   rtx ind = XEXP (op, 0);
13800
13801   /* Match: (mem (reg)).  */
13802   if (REG_P (ind))
13803     return arm_address_register_rtx_p (ind, 0);
13804
13805   /* Allow only post-increment by the mode size.  */
13806   if (GET_CODE (ind) == POST_INC)
13807     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13808
13809   return FALSE;
13810 }
13811
13812 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13813    type.  */
13814 int
13815 neon_struct_mem_operand (rtx op)
13816 {
13817   rtx ind;
13818
13819   /* Reject eliminable registers.  */
13820   if (! (reload_in_progress || reload_completed)
13821       && (   reg_mentioned_p (frame_pointer_rtx, op)
13822           || reg_mentioned_p (arg_pointer_rtx, op)
13823           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13824           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13825           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13826           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13827     return FALSE;
13828
13829   /* Constants are converted into offsets from labels.  */
13830   if (!MEM_P (op))
13831     return FALSE;
13832
13833   ind = XEXP (op, 0);
13834
13835   if (reload_completed
13836       && (LABEL_REF_P (ind)
13837           || (GET_CODE (ind) == CONST
13838               && GET_CODE (XEXP (ind, 0)) == PLUS
13839               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13840               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13841     return TRUE;
13842
13843   /* Match: (mem (reg)).  */
13844   if (REG_P (ind))
13845     return arm_address_register_rtx_p (ind, 0);
13846
13847   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13848   if (GET_CODE (ind) == POST_INC
13849       || GET_CODE (ind) == PRE_DEC)
13850     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13851
13852   return FALSE;
13853 }
13854
13855 /* Prepares the operands for the VCMLA by lane instruction such that the right
13856    register number is selected.  This instruction is special in that it always
13857    requires a D register, however there is a choice to be made between Dn[0],
13858    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13859
13860    The VCMLA by lane function always selects two values. For instance given D0
13861    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13862    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13863    D0[0] or D1[0] are both valid.
13864
13865    This function centralizes that information based on OPERANDS, OPERANDS[3]
13866    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13867    updated to contain the right index.  */
13868
13869 rtx *
13870 neon_vcmla_lane_prepare_operands (rtx *operands)
13871 {
13872   int lane = INTVAL (operands[4]);
13873   machine_mode constmode = SImode;
13874   machine_mode mode = GET_MODE (operands[3]);
13875   int regno = REGNO (operands[3]);
13876   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13877   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13878     {
13879       operands[3] = gen_int_mode (regno + 1, constmode);
13880       operands[4]
13881         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13882     }
13883   else
13884     {
13885       operands[3] = gen_int_mode (regno, constmode);
13886       operands[4] = gen_int_mode (lane, constmode);
13887     }
13888   return operands;
13889 }
13890
13891
13892 /* Return true if X is a register that will be eliminated later on.  */
13893 int
13894 arm_eliminable_register (rtx x)
13895 {
13896   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13897                        || REGNO (x) == ARG_POINTER_REGNUM
13898                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13899                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13900 }
13901
13902 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13903    coprocessor registers.  Otherwise return NO_REGS.  */
13904
13905 enum reg_class
13906 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13907 {
13908   if (mode == HFmode)
13909     {
13910       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13911         return GENERAL_REGS;
13912       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13913         return NO_REGS;
13914       return GENERAL_REGS;
13915     }
13916
13917   /* The neon move patterns handle all legitimate vector and struct
13918      addresses.  */
13919   if (TARGET_NEON
13920       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13921       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13922           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13923           || VALID_NEON_STRUCT_MODE (mode)))
13924     return NO_REGS;
13925
13926   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13927     return NO_REGS;
13928
13929   return GENERAL_REGS;
13930 }
13931
13932 /* Values which must be returned in the most-significant end of the return
13933    register.  */
13934
13935 static bool
13936 arm_return_in_msb (const_tree valtype)
13937 {
13938   return (TARGET_AAPCS_BASED
13939           && BYTES_BIG_ENDIAN
13940           && (AGGREGATE_TYPE_P (valtype)
13941               || TREE_CODE (valtype) == COMPLEX_TYPE
13942               || FIXED_POINT_TYPE_P (valtype)));
13943 }
13944
13945 /* Return TRUE if X references a SYMBOL_REF.  */
13946 int
13947 symbol_mentioned_p (rtx x)
13948 {
13949   const char * fmt;
13950   int i;
13951
13952   if (SYMBOL_REF_P (x))
13953     return 1;
13954
13955   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13956      are constant offsets, not symbols.  */
13957   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13958     return 0;
13959
13960   fmt = GET_RTX_FORMAT (GET_CODE (x));
13961
13962   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13963     {
13964       if (fmt[i] == 'E')
13965         {
13966           int j;
13967
13968           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13969             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13970               return 1;
13971         }
13972       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13973         return 1;
13974     }
13975
13976   return 0;
13977 }
13978
13979 /* Return TRUE if X references a LABEL_REF.  */
13980 int
13981 label_mentioned_p (rtx x)
13982 {
13983   const char * fmt;
13984   int i;
13985
13986   if (LABEL_REF_P (x))
13987     return 1;
13988
13989   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13990      instruction, but they are constant offsets, not symbols.  */
13991   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13992     return 0;
13993
13994   fmt = GET_RTX_FORMAT (GET_CODE (x));
13995   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13996     {
13997       if (fmt[i] == 'E')
13998         {
13999           int j;
14000
14001           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14002             if (label_mentioned_p (XVECEXP (x, i, j)))
14003               return 1;
14004         }
14005       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14006         return 1;
14007     }
14008
14009   return 0;
14010 }
14011
14012 int
14013 tls_mentioned_p (rtx x)
14014 {
14015   switch (GET_CODE (x))
14016     {
14017     case CONST:
14018       return tls_mentioned_p (XEXP (x, 0));
14019
14020     case UNSPEC:
14021       if (XINT (x, 1) == UNSPEC_TLS)
14022         return 1;
14023
14024     /* Fall through.  */
14025     default:
14026       return 0;
14027     }
14028 }
14029
14030 /* Must not copy any rtx that uses a pc-relative address.
14031    Also, disallow copying of load-exclusive instructions that
14032    may appear after splitting of compare-and-swap-style operations
14033    so as to prevent those loops from being transformed away from their
14034    canonical forms (see PR 69904).  */
14035
14036 static bool
14037 arm_cannot_copy_insn_p (rtx_insn *insn)
14038 {
14039   /* The tls call insn cannot be copied, as it is paired with a data
14040      word.  */
14041   if (recog_memoized (insn) == CODE_FOR_tlscall)
14042     return true;
14043
14044   subrtx_iterator::array_type array;
14045   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14046     {
14047       const_rtx x = *iter;
14048       if (GET_CODE (x) == UNSPEC
14049           && (XINT (x, 1) == UNSPEC_PIC_BASE
14050               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14051         return true;
14052     }
14053
14054   rtx set = single_set (insn);
14055   if (set)
14056     {
14057       rtx src = SET_SRC (set);
14058       if (GET_CODE (src) == ZERO_EXTEND)
14059         src = XEXP (src, 0);
14060
14061       /* Catch the load-exclusive and load-acquire operations.  */
14062       if (GET_CODE (src) == UNSPEC_VOLATILE
14063           && (XINT (src, 1) == VUNSPEC_LL
14064               || XINT (src, 1) == VUNSPEC_LAX))
14065         return true;
14066     }
14067   return false;
14068 }
14069
14070 enum rtx_code
14071 minmax_code (rtx x)
14072 {
14073   enum rtx_code code = GET_CODE (x);
14074
14075   switch (code)
14076     {
14077     case SMAX:
14078       return GE;
14079     case SMIN:
14080       return LE;
14081     case UMIN:
14082       return LEU;
14083     case UMAX:
14084       return GEU;
14085     default:
14086       gcc_unreachable ();
14087     }
14088 }
14089
14090 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14091
14092 bool
14093 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14094                         int *mask, bool *signed_sat)
14095 {
14096   /* The high bound must be a power of two minus one.  */
14097   int log = exact_log2 (INTVAL (hi_bound) + 1);
14098   if (log == -1)
14099     return false;
14100
14101   /* The low bound is either zero (for usat) or one less than the
14102      negation of the high bound (for ssat).  */
14103   if (INTVAL (lo_bound) == 0)
14104     {
14105       if (mask)
14106         *mask = log;
14107       if (signed_sat)
14108         *signed_sat = false;
14109
14110       return true;
14111     }
14112
14113   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14114     {
14115       if (mask)
14116         *mask = log + 1;
14117       if (signed_sat)
14118         *signed_sat = true;
14119
14120       return true;
14121     }
14122
14123   return false;
14124 }
14125
14126 /* Return 1 if memory locations are adjacent.  */
14127 int
14128 adjacent_mem_locations (rtx a, rtx b)
14129 {
14130   /* We don't guarantee to preserve the order of these memory refs.  */
14131   if (volatile_refs_p (a) || volatile_refs_p (b))
14132     return 0;
14133
14134   if ((REG_P (XEXP (a, 0))
14135        || (GET_CODE (XEXP (a, 0)) == PLUS
14136            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14137       && (REG_P (XEXP (b, 0))
14138           || (GET_CODE (XEXP (b, 0)) == PLUS
14139               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14140     {
14141       HOST_WIDE_INT val0 = 0, val1 = 0;
14142       rtx reg0, reg1;
14143       int val_diff;
14144
14145       if (GET_CODE (XEXP (a, 0)) == PLUS)
14146         {
14147           reg0 = XEXP (XEXP (a, 0), 0);
14148           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14149         }
14150       else
14151         reg0 = XEXP (a, 0);
14152
14153       if (GET_CODE (XEXP (b, 0)) == PLUS)
14154         {
14155           reg1 = XEXP (XEXP (b, 0), 0);
14156           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14157         }
14158       else
14159         reg1 = XEXP (b, 0);
14160
14161       /* Don't accept any offset that will require multiple
14162          instructions to handle, since this would cause the
14163          arith_adjacentmem pattern to output an overlong sequence.  */
14164       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14165         return 0;
14166
14167       /* Don't allow an eliminable register: register elimination can make
14168          the offset too large.  */
14169       if (arm_eliminable_register (reg0))
14170         return 0;
14171
14172       val_diff = val1 - val0;
14173
14174       if (arm_ld_sched)
14175         {
14176           /* If the target has load delay slots, then there's no benefit
14177              to using an ldm instruction unless the offset is zero and
14178              we are optimizing for size.  */
14179           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14180                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14181                   && (val_diff == 4 || val_diff == -4));
14182         }
14183
14184       return ((REGNO (reg0) == REGNO (reg1))
14185               && (val_diff == 4 || val_diff == -4));
14186     }
14187
14188   return 0;
14189 }
14190
14191 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14192    for load operations, false for store operations.  CONSECUTIVE is true
14193    if the register numbers in the operation must be consecutive in the register
14194    bank. RETURN_PC is true if value is to be loaded in PC.
14195    The pattern we are trying to match for load is:
14196      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14197       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14198        :
14199        :
14200       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14201      ]
14202      where
14203      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14204      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14205      3.  If consecutive is TRUE, then for kth register being loaded,
14206          REGNO (R_dk) = REGNO (R_d0) + k.
14207    The pattern for store is similar.  */
14208 bool
14209 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14210                      bool consecutive, bool return_pc)
14211 {
14212   HOST_WIDE_INT count = XVECLEN (op, 0);
14213   rtx reg, mem, addr;
14214   unsigned regno;
14215   unsigned first_regno;
14216   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14217   rtx elt;
14218   bool addr_reg_in_reglist = false;
14219   bool update = false;
14220   int reg_increment;
14221   int offset_adj;
14222   int regs_per_val;
14223
14224   /* If not in SImode, then registers must be consecutive
14225      (e.g., VLDM instructions for DFmode).  */
14226   gcc_assert ((mode == SImode) || consecutive);
14227   /* Setting return_pc for stores is illegal.  */
14228   gcc_assert (!return_pc || load);
14229
14230   /* Set up the increments and the regs per val based on the mode.  */
14231   reg_increment = GET_MODE_SIZE (mode);
14232   regs_per_val = reg_increment / 4;
14233   offset_adj = return_pc ? 1 : 0;
14234
14235   if (count <= 1
14236       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14237       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14238     return false;
14239
14240   /* Check if this is a write-back.  */
14241   elt = XVECEXP (op, 0, offset_adj);
14242   if (GET_CODE (SET_SRC (elt)) == PLUS)
14243     {
14244       i++;
14245       base = 1;
14246       update = true;
14247
14248       /* The offset adjustment must be the number of registers being
14249          popped times the size of a single register.  */
14250       if (!REG_P (SET_DEST (elt))
14251           || !REG_P (XEXP (SET_SRC (elt), 0))
14252           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14253           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14254           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14255              ((count - 1 - offset_adj) * reg_increment))
14256         return false;
14257     }
14258
14259   i = i + offset_adj;
14260   base = base + offset_adj;
14261   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14262      success depends on the type: VLDM can do just one reg,
14263      LDM must do at least two.  */
14264   if ((count <= i) && (mode == SImode))
14265       return false;
14266
14267   elt = XVECEXP (op, 0, i - 1);
14268   if (GET_CODE (elt) != SET)
14269     return false;
14270
14271   if (load)
14272     {
14273       reg = SET_DEST (elt);
14274       mem = SET_SRC (elt);
14275     }
14276   else
14277     {
14278       reg = SET_SRC (elt);
14279       mem = SET_DEST (elt);
14280     }
14281
14282   if (!REG_P (reg) || !MEM_P (mem))
14283     return false;
14284
14285   regno = REGNO (reg);
14286   first_regno = regno;
14287   addr = XEXP (mem, 0);
14288   if (GET_CODE (addr) == PLUS)
14289     {
14290       if (!CONST_INT_P (XEXP (addr, 1)))
14291         return false;
14292
14293       offset = INTVAL (XEXP (addr, 1));
14294       addr = XEXP (addr, 0);
14295     }
14296
14297   if (!REG_P (addr))
14298     return false;
14299
14300   /* Don't allow SP to be loaded unless it is also the base register. It
14301      guarantees that SP is reset correctly when an LDM instruction
14302      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14303   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14304     return false;
14305
14306   if (regno == REGNO (addr))
14307     addr_reg_in_reglist = true;
14308
14309   for (; i < count; i++)
14310     {
14311       elt = XVECEXP (op, 0, i);
14312       if (GET_CODE (elt) != SET)
14313         return false;
14314
14315       if (load)
14316         {
14317           reg = SET_DEST (elt);
14318           mem = SET_SRC (elt);
14319         }
14320       else
14321         {
14322           reg = SET_SRC (elt);
14323           mem = SET_DEST (elt);
14324         }
14325
14326       if (!REG_P (reg)
14327           || GET_MODE (reg) != mode
14328           || REGNO (reg) <= regno
14329           || (consecutive
14330               && (REGNO (reg) !=
14331                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14332           /* Don't allow SP to be loaded unless it is also the base register. It
14333              guarantees that SP is reset correctly when an LDM instruction
14334              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14335           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14336           || !MEM_P (mem)
14337           || GET_MODE (mem) != mode
14338           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14339                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14340                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14341                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14342                    offset + (i - base) * reg_increment))
14343               && (!REG_P (XEXP (mem, 0))
14344                   || offset + (i - base) * reg_increment != 0)))
14345         return false;
14346
14347       regno = REGNO (reg);
14348       if (regno == REGNO (addr))
14349         addr_reg_in_reglist = true;
14350     }
14351
14352   if (load)
14353     {
14354       if (update && addr_reg_in_reglist)
14355         return false;
14356
14357       /* For Thumb-1, address register is always modified - either by write-back
14358          or by explicit load.  If the pattern does not describe an update,
14359          then the address register must be in the list of loaded registers.  */
14360       if (TARGET_THUMB1)
14361         return update || addr_reg_in_reglist;
14362     }
14363
14364   return true;
14365 }
14366
14367 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14368    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14369    following form:
14370
14371    [(set (reg:SI <N>) (const_int 0))
14372     (set (reg:SI <M>) (const_int 0))
14373     ...
14374     (unspec_volatile [(const_int 0)]
14375                      VUNSPEC_CLRM_APSR)
14376     (clobber (reg:CC CC_REGNUM))
14377    ]
14378
14379    Any number (including 0) of set expressions is valid, the volatile unspec is
14380    optional.  All registers but SP and PC are allowed and registers must be in
14381    strict increasing order.
14382
14383    To be a valid VSCCLRM pattern, OP must have the following form:
14384
14385    [(unspec_volatile [(const_int 0)]
14386                      VUNSPEC_VSCCLRM_VPR)
14387     (set (reg:SF <N>) (const_int 0))
14388     (set (reg:SF <M>) (const_int 0))
14389     ...
14390    ]
14391
14392    As with CLRM, any number (including 0) of set expressions is valid, however
14393    the volatile unspec is mandatory here.  Any VFP single-precision register is
14394    accepted but all registers must be consecutive and in increasing order.  */
14395
14396 bool
14397 clear_operation_p (rtx op, bool vfp)
14398 {
14399   unsigned regno;
14400   unsigned last_regno = INVALID_REGNUM;
14401   rtx elt, reg, zero;
14402   int count = XVECLEN (op, 0);
14403   int first_set = vfp ? 1 : 0;
14404   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14405
14406   for (int i = first_set; i < count; i++)
14407     {
14408       elt = XVECEXP (op, 0, i);
14409
14410       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14411         {
14412           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14413               || XVECLEN (elt, 0) != 1
14414               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14415               || i != count - 2)
14416             return false;
14417
14418           continue;
14419         }
14420
14421       if (GET_CODE (elt) == CLOBBER)
14422         continue;
14423
14424       if (GET_CODE (elt) != SET)
14425         return false;
14426
14427       reg = SET_DEST (elt);
14428       zero = SET_SRC (elt);
14429
14430       if (!REG_P (reg)
14431           || GET_MODE (reg) != expected_mode
14432           || zero != CONST0_RTX (SImode))
14433         return false;
14434
14435       regno = REGNO (reg);
14436
14437       if (vfp)
14438         {
14439           if (i != first_set && regno != last_regno + 1)
14440             return false;
14441         }
14442       else
14443         {
14444           if (regno == SP_REGNUM || regno == PC_REGNUM)
14445             return false;
14446           if (i != first_set && regno <= last_regno)
14447             return false;
14448         }
14449
14450       last_regno = regno;
14451     }
14452
14453   return true;
14454 }
14455
14456 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14457    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14458    instruction.  ADD_OFFSET is nonzero if the base address register needs
14459    to be modified with an add instruction before we can use it.  */
14460
14461 static bool
14462 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14463                                  int nops, HOST_WIDE_INT add_offset)
14464  {
14465   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14466      if the offset isn't small enough.  The reason 2 ldrs are faster
14467      is because these ARMs are able to do more than one cache access
14468      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14469      whilst the ARM8 has a double bandwidth cache.  This means that
14470      these cores can do both an instruction fetch and a data fetch in
14471      a single cycle, so the trick of calculating the address into a
14472      scratch register (one of the result regs) and then doing a load
14473      multiple actually becomes slower (and no smaller in code size).
14474      That is the transformation
14475
14476         ldr     rd1, [rbase + offset]
14477         ldr     rd2, [rbase + offset + 4]
14478
14479      to
14480
14481         add     rd1, rbase, offset
14482         ldmia   rd1, {rd1, rd2}
14483
14484      produces worse code -- '3 cycles + any stalls on rd2' instead of
14485      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14486      access per cycle, the first sequence could never complete in less
14487      than 6 cycles, whereas the ldm sequence would only take 5 and
14488      would make better use of sequential accesses if not hitting the
14489      cache.
14490
14491      We cheat here and test 'arm_ld_sched' which we currently know to
14492      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14493      changes, then the test below needs to be reworked.  */
14494   if (nops == 2 && arm_ld_sched && add_offset != 0)
14495     return false;
14496
14497   /* XScale has load-store double instructions, but they have stricter
14498      alignment requirements than load-store multiple, so we cannot
14499      use them.
14500
14501      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14502      the pipeline until completion.
14503
14504         NREGS           CYCLES
14505           1               3
14506           2               4
14507           3               5
14508           4               6
14509
14510      An ldr instruction takes 1-3 cycles, but does not block the
14511      pipeline.
14512
14513         NREGS           CYCLES
14514           1              1-3
14515           2              2-6
14516           3              3-9
14517           4              4-12
14518
14519      Best case ldr will always win.  However, the more ldr instructions
14520      we issue, the less likely we are to be able to schedule them well.
14521      Using ldr instructions also increases code size.
14522
14523      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14524      for counts of 3 or 4 regs.  */
14525   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14526     return false;
14527   return true;
14528 }
14529
14530 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14531    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14532    an array ORDER which describes the sequence to use when accessing the
14533    offsets that produces an ascending order.  In this sequence, each
14534    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14535    must have been filled in with the lowest offset by the caller.
14536    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14537    we use to verify that ORDER produces an ascending order of registers.
14538    Return true if it was possible to construct such an order, false if
14539    not.  */
14540
14541 static bool
14542 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14543                       int *unsorted_regs)
14544 {
14545   int i;
14546   for (i = 1; i < nops; i++)
14547     {
14548       int j;
14549
14550       order[i] = order[i - 1];
14551       for (j = 0; j < nops; j++)
14552         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14553           {
14554             /* We must find exactly one offset that is higher than the
14555                previous one by 4.  */
14556             if (order[i] != order[i - 1])
14557               return false;
14558             order[i] = j;
14559           }
14560       if (order[i] == order[i - 1])
14561         return false;
14562       /* The register numbers must be ascending.  */
14563       if (unsorted_regs != NULL
14564           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14565         return false;
14566     }
14567   return true;
14568 }
14569
14570 /* Used to determine in a peephole whether a sequence of load
14571    instructions can be changed into a load-multiple instruction.
14572    NOPS is the number of separate load instructions we are examining.  The
14573    first NOPS entries in OPERANDS are the destination registers, the
14574    next NOPS entries are memory operands.  If this function is
14575    successful, *BASE is set to the common base register of the memory
14576    accesses; *LOAD_OFFSET is set to the first memory location's offset
14577    from that base register.
14578    REGS is an array filled in with the destination register numbers.
14579    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14580    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14581    the sequence of registers in REGS matches the loads from ascending memory
14582    locations, and the function verifies that the register numbers are
14583    themselves ascending.  If CHECK_REGS is false, the register numbers
14584    are stored in the order they are found in the operands.  */
14585 static int
14586 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14587                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14588 {
14589   int unsorted_regs[MAX_LDM_STM_OPS];
14590   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14591   int order[MAX_LDM_STM_OPS];
14592   int base_reg = -1;
14593   int i, ldm_case;
14594
14595   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14596      easily extended if required.  */
14597   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14598
14599   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14600
14601   /* Loop over the operands and check that the memory references are
14602      suitable (i.e. immediate offsets from the same base register).  At
14603      the same time, extract the target register, and the memory
14604      offsets.  */
14605   for (i = 0; i < nops; i++)
14606     {
14607       rtx reg;
14608       rtx offset;
14609
14610       /* Convert a subreg of a mem into the mem itself.  */
14611       if (GET_CODE (operands[nops + i]) == SUBREG)
14612         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14613
14614       gcc_assert (MEM_P (operands[nops + i]));
14615
14616       /* Don't reorder volatile memory references; it doesn't seem worth
14617          looking for the case where the order is ok anyway.  */
14618       if (MEM_VOLATILE_P (operands[nops + i]))
14619         return 0;
14620
14621       offset = const0_rtx;
14622
14623       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14624            || (SUBREG_P (reg)
14625                && REG_P (reg = SUBREG_REG (reg))))
14626           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14627               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14628                   || (SUBREG_P (reg)
14629                       && REG_P (reg = SUBREG_REG (reg))))
14630               && (CONST_INT_P (offset
14631                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14632         {
14633           if (i == 0)
14634             {
14635               base_reg = REGNO (reg);
14636               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14637                 return 0;
14638             }
14639           else if (base_reg != (int) REGNO (reg))
14640             /* Not addressed from the same base register.  */
14641             return 0;
14642
14643           unsorted_regs[i] = (REG_P (operands[i])
14644                               ? REGNO (operands[i])
14645                               : REGNO (SUBREG_REG (operands[i])));
14646
14647           /* If it isn't an integer register, or if it overwrites the
14648              base register but isn't the last insn in the list, then
14649              we can't do this.  */
14650           if (unsorted_regs[i] < 0
14651               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14652               || unsorted_regs[i] > 14
14653               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14654             return 0;
14655
14656           /* Don't allow SP to be loaded unless it is also the base
14657              register.  It guarantees that SP is reset correctly when
14658              an LDM instruction is interrupted.  Otherwise, we might
14659              end up with a corrupt stack.  */
14660           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14661             return 0;
14662
14663           unsorted_offsets[i] = INTVAL (offset);
14664           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14665             order[0] = i;
14666         }
14667       else
14668         /* Not a suitable memory address.  */
14669         return 0;
14670     }
14671
14672   /* All the useful information has now been extracted from the
14673      operands into unsorted_regs and unsorted_offsets; additionally,
14674      order[0] has been set to the lowest offset in the list.  Sort
14675      the offsets into order, verifying that they are adjacent, and
14676      check that the register numbers are ascending.  */
14677   if (!compute_offset_order (nops, unsorted_offsets, order,
14678                              check_regs ? unsorted_regs : NULL))
14679     return 0;
14680
14681   if (saved_order)
14682     memcpy (saved_order, order, sizeof order);
14683
14684   if (base)
14685     {
14686       *base = base_reg;
14687
14688       for (i = 0; i < nops; i++)
14689         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14690
14691       *load_offset = unsorted_offsets[order[0]];
14692     }
14693
14694   if (unsorted_offsets[order[0]] == 0)
14695     ldm_case = 1; /* ldmia */
14696   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14697     ldm_case = 2; /* ldmib */
14698   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14699     ldm_case = 3; /* ldmda */
14700   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14701     ldm_case = 4; /* ldmdb */
14702   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14703            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14704     ldm_case = 5;
14705   else
14706     return 0;
14707
14708   if (!multiple_operation_profitable_p (false, nops,
14709                                         ldm_case == 5
14710                                         ? unsorted_offsets[order[0]] : 0))
14711     return 0;
14712
14713   return ldm_case;
14714 }
14715
14716 /* Used to determine in a peephole whether a sequence of store instructions can
14717    be changed into a store-multiple instruction.
14718    NOPS is the number of separate store instructions we are examining.
14719    NOPS_TOTAL is the total number of instructions recognized by the peephole
14720    pattern.
14721    The first NOPS entries in OPERANDS are the source registers, the next
14722    NOPS entries are memory operands.  If this function is successful, *BASE is
14723    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14724    to the first memory location's offset from that base register.  REGS is an
14725    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14726    likewise filled with the corresponding rtx's.
14727    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14728    numbers to an ascending order of stores.
14729    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14730    from ascending memory locations, and the function verifies that the register
14731    numbers are themselves ascending.  If CHECK_REGS is false, the register
14732    numbers are stored in the order they are found in the operands.  */
14733 static int
14734 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14735                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14736                          HOST_WIDE_INT *load_offset, bool check_regs)
14737 {
14738   int unsorted_regs[MAX_LDM_STM_OPS];
14739   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14740   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14741   int order[MAX_LDM_STM_OPS];
14742   int base_reg = -1;
14743   rtx base_reg_rtx = NULL;
14744   int i, stm_case;
14745
14746   /* Write back of base register is currently only supported for Thumb 1.  */
14747   int base_writeback = TARGET_THUMB1;
14748
14749   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14750      easily extended if required.  */
14751   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14752
14753   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14754
14755   /* Loop over the operands and check that the memory references are
14756      suitable (i.e. immediate offsets from the same base register).  At
14757      the same time, extract the target register, and the memory
14758      offsets.  */
14759   for (i = 0; i < nops; i++)
14760     {
14761       rtx reg;
14762       rtx offset;
14763
14764       /* Convert a subreg of a mem into the mem itself.  */
14765       if (GET_CODE (operands[nops + i]) == SUBREG)
14766         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14767
14768       gcc_assert (MEM_P (operands[nops + i]));
14769
14770       /* Don't reorder volatile memory references; it doesn't seem worth
14771          looking for the case where the order is ok anyway.  */
14772       if (MEM_VOLATILE_P (operands[nops + i]))
14773         return 0;
14774
14775       offset = const0_rtx;
14776
14777       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14778            || (SUBREG_P (reg)
14779                && REG_P (reg = SUBREG_REG (reg))))
14780           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14781               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14782                   || (SUBREG_P (reg)
14783                       && REG_P (reg = SUBREG_REG (reg))))
14784               && (CONST_INT_P (offset
14785                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14786         {
14787           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14788                                   ? operands[i] : SUBREG_REG (operands[i]));
14789           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14790
14791           if (i == 0)
14792             {
14793               base_reg = REGNO (reg);
14794               base_reg_rtx = reg;
14795               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14796                 return 0;
14797             }
14798           else if (base_reg != (int) REGNO (reg))
14799             /* Not addressed from the same base register.  */
14800             return 0;
14801
14802           /* If it isn't an integer register, then we can't do this.  */
14803           if (unsorted_regs[i] < 0
14804               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14805               /* The effects are unpredictable if the base register is
14806                  both updated and stored.  */
14807               || (base_writeback && unsorted_regs[i] == base_reg)
14808               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14809               || unsorted_regs[i] > 14)
14810             return 0;
14811
14812           unsorted_offsets[i] = INTVAL (offset);
14813           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14814             order[0] = i;
14815         }
14816       else
14817         /* Not a suitable memory address.  */
14818         return 0;
14819     }
14820
14821   /* All the useful information has now been extracted from the
14822      operands into unsorted_regs and unsorted_offsets; additionally,
14823      order[0] has been set to the lowest offset in the list.  Sort
14824      the offsets into order, verifying that they are adjacent, and
14825      check that the register numbers are ascending.  */
14826   if (!compute_offset_order (nops, unsorted_offsets, order,
14827                              check_regs ? unsorted_regs : NULL))
14828     return 0;
14829
14830   if (saved_order)
14831     memcpy (saved_order, order, sizeof order);
14832
14833   if (base)
14834     {
14835       *base = base_reg;
14836
14837       for (i = 0; i < nops; i++)
14838         {
14839           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14840           if (reg_rtxs)
14841             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14842         }
14843
14844       *load_offset = unsorted_offsets[order[0]];
14845     }
14846
14847   if (TARGET_THUMB1
14848       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14849     return 0;
14850
14851   if (unsorted_offsets[order[0]] == 0)
14852     stm_case = 1; /* stmia */
14853   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14854     stm_case = 2; /* stmib */
14855   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14856     stm_case = 3; /* stmda */
14857   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14858     stm_case = 4; /* stmdb */
14859   else
14860     return 0;
14861
14862   if (!multiple_operation_profitable_p (false, nops, 0))
14863     return 0;
14864
14865   return stm_case;
14866 }
14867 \f
14868 /* Routines for use in generating RTL.  */
14869
14870 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14871    the instruction; REGS and MEMS are arrays containing the operands.
14872    BASEREG is the base register to be used in addressing the memory operands.
14873    WBACK_OFFSET is nonzero if the instruction should update the base
14874    register.  */
14875
14876 static rtx
14877 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14878                          HOST_WIDE_INT wback_offset)
14879 {
14880   int i = 0, j;
14881   rtx result;
14882
14883   if (!multiple_operation_profitable_p (false, count, 0))
14884     {
14885       rtx seq;
14886
14887       start_sequence ();
14888
14889       for (i = 0; i < count; i++)
14890         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14891
14892       if (wback_offset != 0)
14893         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14894
14895       seq = get_insns ();
14896       end_sequence ();
14897
14898       return seq;
14899     }
14900
14901   result = gen_rtx_PARALLEL (VOIDmode,
14902                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14903   if (wback_offset != 0)
14904     {
14905       XVECEXP (result, 0, 0)
14906         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14907       i = 1;
14908       count++;
14909     }
14910
14911   for (j = 0; i < count; i++, j++)
14912     XVECEXP (result, 0, i)
14913       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14914
14915   return result;
14916 }
14917
14918 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14919    the instruction; REGS and MEMS are arrays containing the operands.
14920    BASEREG is the base register to be used in addressing the memory operands.
14921    WBACK_OFFSET is nonzero if the instruction should update the base
14922    register.  */
14923
14924 static rtx
14925 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14926                           HOST_WIDE_INT wback_offset)
14927 {
14928   int i = 0, j;
14929   rtx result;
14930
14931   if (GET_CODE (basereg) == PLUS)
14932     basereg = XEXP (basereg, 0);
14933
14934   if (!multiple_operation_profitable_p (false, count, 0))
14935     {
14936       rtx seq;
14937
14938       start_sequence ();
14939
14940       for (i = 0; i < count; i++)
14941         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14942
14943       if (wback_offset != 0)
14944         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14945
14946       seq = get_insns ();
14947       end_sequence ();
14948
14949       return seq;
14950     }
14951
14952   result = gen_rtx_PARALLEL (VOIDmode,
14953                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14954   if (wback_offset != 0)
14955     {
14956       XVECEXP (result, 0, 0)
14957         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14958       i = 1;
14959       count++;
14960     }
14961
14962   for (j = 0; i < count; i++, j++)
14963     XVECEXP (result, 0, i)
14964       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14965
14966   return result;
14967 }
14968
14969 /* Generate either a load-multiple or a store-multiple instruction.  This
14970    function can be used in situations where we can start with a single MEM
14971    rtx and adjust its address upwards.
14972    COUNT is the number of operations in the instruction, not counting a
14973    possible update of the base register.  REGS is an array containing the
14974    register operands.
14975    BASEREG is the base register to be used in addressing the memory operands,
14976    which are constructed from BASEMEM.
14977    WRITE_BACK specifies whether the generated instruction should include an
14978    update of the base register.
14979    OFFSETP is used to pass an offset to and from this function; this offset
14980    is not used when constructing the address (instead BASEMEM should have an
14981    appropriate offset in its address), it is used only for setting
14982    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14983
14984 static rtx
14985 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14986                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14987 {
14988   rtx mems[MAX_LDM_STM_OPS];
14989   HOST_WIDE_INT offset = *offsetp;
14990   int i;
14991
14992   gcc_assert (count <= MAX_LDM_STM_OPS);
14993
14994   if (GET_CODE (basereg) == PLUS)
14995     basereg = XEXP (basereg, 0);
14996
14997   for (i = 0; i < count; i++)
14998     {
14999       rtx addr = plus_constant (Pmode, basereg, i * 4);
15000       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15001       offset += 4;
15002     }
15003
15004   if (write_back)
15005     *offsetp = offset;
15006
15007   if (is_load)
15008     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15009                                     write_back ? 4 * count : 0);
15010   else
15011     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15012                                      write_back ? 4 * count : 0);
15013 }
15014
15015 rtx
15016 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15017                        rtx basemem, HOST_WIDE_INT *offsetp)
15018 {
15019   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15020                               offsetp);
15021 }
15022
15023 rtx
15024 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15025                         rtx basemem, HOST_WIDE_INT *offsetp)
15026 {
15027   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15028                               offsetp);
15029 }
15030
15031 /* Called from a peephole2 expander to turn a sequence of loads into an
15032    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15033    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15034    is true if we can reorder the registers because they are used commutatively
15035    subsequently.
15036    Returns true iff we could generate a new instruction.  */
15037
15038 bool
15039 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15040 {
15041   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15042   rtx mems[MAX_LDM_STM_OPS];
15043   int i, j, base_reg;
15044   rtx base_reg_rtx;
15045   HOST_WIDE_INT offset;
15046   int write_back = FALSE;
15047   int ldm_case;
15048   rtx addr;
15049
15050   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15051                                      &base_reg, &offset, !sort_regs);
15052
15053   if (ldm_case == 0)
15054     return false;
15055
15056   if (sort_regs)
15057     for (i = 0; i < nops - 1; i++)
15058       for (j = i + 1; j < nops; j++)
15059         if (regs[i] > regs[j])
15060           {
15061             int t = regs[i];
15062             regs[i] = regs[j];
15063             regs[j] = t;
15064           }
15065   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15066
15067   if (TARGET_THUMB1)
15068     {
15069       gcc_assert (ldm_case == 1 || ldm_case == 5);
15070
15071       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15072       write_back = true;
15073       for (i = 0; i < nops; i++)
15074         if (base_reg == regs[i])
15075           write_back = false;
15076
15077       /* Ensure the base is dead if it is updated.  */
15078       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15079         return false;
15080     }
15081
15082   if (ldm_case == 5)
15083     {
15084       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15085       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15086       offset = 0;
15087       base_reg_rtx = newbase;
15088     }
15089
15090   for (i = 0; i < nops; i++)
15091     {
15092       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15093       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15094                                               SImode, addr, 0);
15095     }
15096   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15097                                       write_back ? offset + i * 4 : 0));
15098   return true;
15099 }
15100
15101 /* Called from a peephole2 expander to turn a sequence of stores into an
15102    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15103    NOPS indicates how many separate stores we are trying to combine.
15104    Returns true iff we could generate a new instruction.  */
15105
15106 bool
15107 gen_stm_seq (rtx *operands, int nops)
15108 {
15109   int i;
15110   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15111   rtx mems[MAX_LDM_STM_OPS];
15112   int base_reg;
15113   rtx base_reg_rtx;
15114   HOST_WIDE_INT offset;
15115   int write_back = FALSE;
15116   int stm_case;
15117   rtx addr;
15118   bool base_reg_dies;
15119
15120   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15121                                       mem_order, &base_reg, &offset, true);
15122
15123   if (stm_case == 0)
15124     return false;
15125
15126   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15127
15128   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15129   if (TARGET_THUMB1)
15130     {
15131       gcc_assert (base_reg_dies);
15132       write_back = TRUE;
15133     }
15134
15135   if (stm_case == 5)
15136     {
15137       gcc_assert (base_reg_dies);
15138       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15139       offset = 0;
15140     }
15141
15142   addr = plus_constant (Pmode, base_reg_rtx, offset);
15143
15144   for (i = 0; i < nops; i++)
15145     {
15146       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15147       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15148                                               SImode, addr, 0);
15149     }
15150   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15151                                        write_back ? offset + i * 4 : 0));
15152   return true;
15153 }
15154
15155 /* Called from a peephole2 expander to turn a sequence of stores that are
15156    preceded by constant loads into an STM instruction.  OPERANDS are the
15157    operands found by the peephole matcher; NOPS indicates how many
15158    separate stores we are trying to combine; there are 2 * NOPS
15159    instructions in the peephole.
15160    Returns true iff we could generate a new instruction.  */
15161
15162 bool
15163 gen_const_stm_seq (rtx *operands, int nops)
15164 {
15165   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15166   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15167   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15168   rtx mems[MAX_LDM_STM_OPS];
15169   int base_reg;
15170   rtx base_reg_rtx;
15171   HOST_WIDE_INT offset;
15172   int write_back = FALSE;
15173   int stm_case;
15174   rtx addr;
15175   bool base_reg_dies;
15176   int i, j;
15177   HARD_REG_SET allocated;
15178
15179   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15180                                       mem_order, &base_reg, &offset, false);
15181
15182   if (stm_case == 0)
15183     return false;
15184
15185   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15186
15187   /* If the same register is used more than once, try to find a free
15188      register.  */
15189   CLEAR_HARD_REG_SET (allocated);
15190   for (i = 0; i < nops; i++)
15191     {
15192       for (j = i + 1; j < nops; j++)
15193         if (regs[i] == regs[j])
15194           {
15195             rtx t = peep2_find_free_register (0, nops * 2,
15196                                               TARGET_THUMB1 ? "l" : "r",
15197                                               SImode, &allocated);
15198             if (t == NULL_RTX)
15199               return false;
15200             reg_rtxs[i] = t;
15201             regs[i] = REGNO (t);
15202           }
15203     }
15204
15205   /* Compute an ordering that maps the register numbers to an ascending
15206      sequence.  */
15207   reg_order[0] = 0;
15208   for (i = 0; i < nops; i++)
15209     if (regs[i] < regs[reg_order[0]])
15210       reg_order[0] = i;
15211
15212   for (i = 1; i < nops; i++)
15213     {
15214       int this_order = reg_order[i - 1];
15215       for (j = 0; j < nops; j++)
15216         if (regs[j] > regs[reg_order[i - 1]]
15217             && (this_order == reg_order[i - 1]
15218                 || regs[j] < regs[this_order]))
15219           this_order = j;
15220       reg_order[i] = this_order;
15221     }
15222
15223   /* Ensure that registers that must be live after the instruction end
15224      up with the correct value.  */
15225   for (i = 0; i < nops; i++)
15226     {
15227       int this_order = reg_order[i];
15228       if ((this_order != mem_order[i]
15229            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15230           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15231         return false;
15232     }
15233
15234   /* Load the constants.  */
15235   for (i = 0; i < nops; i++)
15236     {
15237       rtx op = operands[2 * nops + mem_order[i]];
15238       sorted_regs[i] = regs[reg_order[i]];
15239       emit_move_insn (reg_rtxs[reg_order[i]], op);
15240     }
15241
15242   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15243
15244   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15245   if (TARGET_THUMB1)
15246     {
15247       gcc_assert (base_reg_dies);
15248       write_back = TRUE;
15249     }
15250
15251   if (stm_case == 5)
15252     {
15253       gcc_assert (base_reg_dies);
15254       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15255       offset = 0;
15256     }
15257
15258   addr = plus_constant (Pmode, base_reg_rtx, offset);
15259
15260   for (i = 0; i < nops; i++)
15261     {
15262       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15263       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15264                                               SImode, addr, 0);
15265     }
15266   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15267                                        write_back ? offset + i * 4 : 0));
15268   return true;
15269 }
15270
15271 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15272    unaligned copies on processors which support unaligned semantics for those
15273    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15274    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15275    An interleave factor of 1 (the minimum) will perform no interleaving.
15276    Load/store multiple are used for aligned addresses where possible.  */
15277
15278 static void
15279 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15280                                    HOST_WIDE_INT length,
15281                                    unsigned int interleave_factor)
15282 {
15283   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15284   int *regnos = XALLOCAVEC (int, interleave_factor);
15285   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15286   HOST_WIDE_INT i, j;
15287   HOST_WIDE_INT remaining = length, words;
15288   rtx halfword_tmp = NULL, byte_tmp = NULL;
15289   rtx dst, src;
15290   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15291   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15292   HOST_WIDE_INT srcoffset, dstoffset;
15293   HOST_WIDE_INT src_autoinc, dst_autoinc;
15294   rtx mem, addr;
15295
15296   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15297
15298   /* Use hard registers if we have aligned source or destination so we can use
15299      load/store multiple with contiguous registers.  */
15300   if (dst_aligned || src_aligned)
15301     for (i = 0; i < interleave_factor; i++)
15302       regs[i] = gen_rtx_REG (SImode, i);
15303   else
15304     for (i = 0; i < interleave_factor; i++)
15305       regs[i] = gen_reg_rtx (SImode);
15306
15307   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15308   src = copy_addr_to_reg (XEXP (srcbase, 0));
15309
15310   srcoffset = dstoffset = 0;
15311
15312   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15313      For copying the last bytes we want to subtract this offset again.  */
15314   src_autoinc = dst_autoinc = 0;
15315
15316   for (i = 0; i < interleave_factor; i++)
15317     regnos[i] = i;
15318
15319   /* Copy BLOCK_SIZE_BYTES chunks.  */
15320
15321   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15322     {
15323       /* Load words.  */
15324       if (src_aligned && interleave_factor > 1)
15325         {
15326           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15327                                             TRUE, srcbase, &srcoffset));
15328           src_autoinc += UNITS_PER_WORD * interleave_factor;
15329         }
15330       else
15331         {
15332           for (j = 0; j < interleave_factor; j++)
15333             {
15334               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15335                                                  - src_autoinc));
15336               mem = adjust_automodify_address (srcbase, SImode, addr,
15337                                                srcoffset + j * UNITS_PER_WORD);
15338               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15339             }
15340           srcoffset += block_size_bytes;
15341         }
15342
15343       /* Store words.  */
15344       if (dst_aligned && interleave_factor > 1)
15345         {
15346           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15347                                              TRUE, dstbase, &dstoffset));
15348           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15349         }
15350       else
15351         {
15352           for (j = 0; j < interleave_factor; j++)
15353             {
15354               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15355                                                  - dst_autoinc));
15356               mem = adjust_automodify_address (dstbase, SImode, addr,
15357                                                dstoffset + j * UNITS_PER_WORD);
15358               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15359             }
15360           dstoffset += block_size_bytes;
15361         }
15362
15363       remaining -= block_size_bytes;
15364     }
15365
15366   /* Copy any whole words left (note these aren't interleaved with any
15367      subsequent halfword/byte load/stores in the interests of simplicity).  */
15368
15369   words = remaining / UNITS_PER_WORD;
15370
15371   gcc_assert (words < interleave_factor);
15372
15373   if (src_aligned && words > 1)
15374     {
15375       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15376                                         &srcoffset));
15377       src_autoinc += UNITS_PER_WORD * words;
15378     }
15379   else
15380     {
15381       for (j = 0; j < words; j++)
15382         {
15383           addr = plus_constant (Pmode, src,
15384                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15385           mem = adjust_automodify_address (srcbase, SImode, addr,
15386                                            srcoffset + j * UNITS_PER_WORD);
15387           if (src_aligned)
15388             emit_move_insn (regs[j], mem);
15389           else
15390             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15391         }
15392       srcoffset += words * UNITS_PER_WORD;
15393     }
15394
15395   if (dst_aligned && words > 1)
15396     {
15397       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15398                                          &dstoffset));
15399       dst_autoinc += words * UNITS_PER_WORD;
15400     }
15401   else
15402     {
15403       for (j = 0; j < words; j++)
15404         {
15405           addr = plus_constant (Pmode, dst,
15406                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15407           mem = adjust_automodify_address (dstbase, SImode, addr,
15408                                            dstoffset + j * UNITS_PER_WORD);
15409           if (dst_aligned)
15410             emit_move_insn (mem, regs[j]);
15411           else
15412             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15413         }
15414       dstoffset += words * UNITS_PER_WORD;
15415     }
15416
15417   remaining -= words * UNITS_PER_WORD;
15418
15419   gcc_assert (remaining < 4);
15420
15421   /* Copy a halfword if necessary.  */
15422
15423   if (remaining >= 2)
15424     {
15425       halfword_tmp = gen_reg_rtx (SImode);
15426
15427       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15428       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15429       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15430
15431       /* Either write out immediately, or delay until we've loaded the last
15432          byte, depending on interleave factor.  */
15433       if (interleave_factor == 1)
15434         {
15435           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15436           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15437           emit_insn (gen_unaligned_storehi (mem,
15438                        gen_lowpart (HImode, halfword_tmp)));
15439           halfword_tmp = NULL;
15440           dstoffset += 2;
15441         }
15442
15443       remaining -= 2;
15444       srcoffset += 2;
15445     }
15446
15447   gcc_assert (remaining < 2);
15448
15449   /* Copy last byte.  */
15450
15451   if ((remaining & 1) != 0)
15452     {
15453       byte_tmp = gen_reg_rtx (SImode);
15454
15455       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15456       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15457       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15458
15459       if (interleave_factor == 1)
15460         {
15461           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15462           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15463           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15464           byte_tmp = NULL;
15465           dstoffset++;
15466         }
15467
15468       remaining--;
15469       srcoffset++;
15470     }
15471
15472   /* Store last halfword if we haven't done so already.  */
15473
15474   if (halfword_tmp)
15475     {
15476       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15477       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15478       emit_insn (gen_unaligned_storehi (mem,
15479                    gen_lowpart (HImode, halfword_tmp)));
15480       dstoffset += 2;
15481     }
15482
15483   /* Likewise for last byte.  */
15484
15485   if (byte_tmp)
15486     {
15487       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15488       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15489       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15490       dstoffset++;
15491     }
15492
15493   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15494 }
15495
15496 /* From mips_adjust_block_mem:
15497
15498    Helper function for doing a loop-based block operation on memory
15499    reference MEM.  Each iteration of the loop will operate on LENGTH
15500    bytes of MEM.
15501
15502    Create a new base register for use within the loop and point it to
15503    the start of MEM.  Create a new memory reference that uses this
15504    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15505
15506 static void
15507 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15508                       rtx *loop_mem)
15509 {
15510   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15511
15512   /* Although the new mem does not refer to a known location,
15513      it does keep up to LENGTH bytes of alignment.  */
15514   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15515   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15516 }
15517
15518 /* From mips_block_move_loop:
15519
15520    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15521    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15522    the memory regions do not overlap.  */
15523
15524 static void
15525 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15526                                unsigned int interleave_factor,
15527                                HOST_WIDE_INT bytes_per_iter)
15528 {
15529   rtx src_reg, dest_reg, final_src, test;
15530   HOST_WIDE_INT leftover;
15531
15532   leftover = length % bytes_per_iter;
15533   length -= leftover;
15534
15535   /* Create registers and memory references for use within the loop.  */
15536   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15537   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15538
15539   /* Calculate the value that SRC_REG should have after the last iteration of
15540      the loop.  */
15541   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15542                                    0, 0, OPTAB_WIDEN);
15543
15544   /* Emit the start of the loop.  */
15545   rtx_code_label *label = gen_label_rtx ();
15546   emit_label (label);
15547
15548   /* Emit the loop body.  */
15549   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15550                                      interleave_factor);
15551
15552   /* Move on to the next block.  */
15553   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15554   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15555
15556   /* Emit the loop condition.  */
15557   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15558   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15559
15560   /* Mop up any left-over bytes.  */
15561   if (leftover)
15562     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15563 }
15564
15565 /* Emit a block move when either the source or destination is unaligned (not
15566    aligned to a four-byte boundary).  This may need further tuning depending on
15567    core type, optimize_size setting, etc.  */
15568
15569 static int
15570 arm_cpymemqi_unaligned (rtx *operands)
15571 {
15572   HOST_WIDE_INT length = INTVAL (operands[2]);
15573
15574   if (optimize_size)
15575     {
15576       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15577       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15578       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15579          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15580          or dst_aligned though: allow more interleaving in those cases since the
15581          resulting code can be smaller.  */
15582       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15583       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15584
15585       if (length > 12)
15586         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15587                                        interleave_factor, bytes_per_iter);
15588       else
15589         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15590                                            interleave_factor);
15591     }
15592   else
15593     {
15594       /* Note that the loop created by arm_block_move_unaligned_loop may be
15595          subject to loop unrolling, which makes tuning this condition a little
15596          redundant.  */
15597       if (length > 32)
15598         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15599       else
15600         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15601     }
15602
15603   return 1;
15604 }
15605
15606 int
15607 arm_gen_cpymemqi (rtx *operands)
15608 {
15609   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15610   HOST_WIDE_INT srcoffset, dstoffset;
15611   rtx src, dst, srcbase, dstbase;
15612   rtx part_bytes_reg = NULL;
15613   rtx mem;
15614
15615   if (!CONST_INT_P (operands[2])
15616       || !CONST_INT_P (operands[3])
15617       || INTVAL (operands[2]) > 64)
15618     return 0;
15619
15620   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15621     return arm_cpymemqi_unaligned (operands);
15622
15623   if (INTVAL (operands[3]) & 3)
15624     return 0;
15625
15626   dstbase = operands[0];
15627   srcbase = operands[1];
15628
15629   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15630   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15631
15632   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15633   out_words_to_go = INTVAL (operands[2]) / 4;
15634   last_bytes = INTVAL (operands[2]) & 3;
15635   dstoffset = srcoffset = 0;
15636
15637   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15638     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15639
15640   while (in_words_to_go >= 2)
15641     {
15642       if (in_words_to_go > 4)
15643         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15644                                           TRUE, srcbase, &srcoffset));
15645       else
15646         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15647                                           src, FALSE, srcbase,
15648                                           &srcoffset));
15649
15650       if (out_words_to_go)
15651         {
15652           if (out_words_to_go > 4)
15653             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15654                                                TRUE, dstbase, &dstoffset));
15655           else if (out_words_to_go != 1)
15656             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15657                                                out_words_to_go, dst,
15658                                                (last_bytes == 0
15659                                                 ? FALSE : TRUE),
15660                                                dstbase, &dstoffset));
15661           else
15662             {
15663               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15664               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15665               if (last_bytes != 0)
15666                 {
15667                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15668                   dstoffset += 4;
15669                 }
15670             }
15671         }
15672
15673       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15674       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15675     }
15676
15677   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15678   if (out_words_to_go)
15679     {
15680       rtx sreg;
15681
15682       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15683       sreg = copy_to_reg (mem);
15684
15685       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15686       emit_move_insn (mem, sreg);
15687       in_words_to_go--;
15688
15689       gcc_assert (!in_words_to_go);     /* Sanity check */
15690     }
15691
15692   if (in_words_to_go)
15693     {
15694       gcc_assert (in_words_to_go > 0);
15695
15696       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15697       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15698     }
15699
15700   gcc_assert (!last_bytes || part_bytes_reg);
15701
15702   if (BYTES_BIG_ENDIAN && last_bytes)
15703     {
15704       rtx tmp = gen_reg_rtx (SImode);
15705
15706       /* The bytes we want are in the top end of the word.  */
15707       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15708                               GEN_INT (8 * (4 - last_bytes))));
15709       part_bytes_reg = tmp;
15710
15711       while (last_bytes)
15712         {
15713           mem = adjust_automodify_address (dstbase, QImode,
15714                                            plus_constant (Pmode, dst,
15715                                                           last_bytes - 1),
15716                                            dstoffset + last_bytes - 1);
15717           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15718
15719           if (--last_bytes)
15720             {
15721               tmp = gen_reg_rtx (SImode);
15722               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15723               part_bytes_reg = tmp;
15724             }
15725         }
15726
15727     }
15728   else
15729     {
15730       if (last_bytes > 1)
15731         {
15732           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15733           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15734           last_bytes -= 2;
15735           if (last_bytes)
15736             {
15737               rtx tmp = gen_reg_rtx (SImode);
15738               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15739               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15740               part_bytes_reg = tmp;
15741               dstoffset += 2;
15742             }
15743         }
15744
15745       if (last_bytes)
15746         {
15747           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15748           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15749         }
15750     }
15751
15752   return 1;
15753 }
15754
15755 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15756 by mode size.  */
15757 inline static rtx
15758 next_consecutive_mem (rtx mem)
15759 {
15760   machine_mode mode = GET_MODE (mem);
15761   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15762   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15763
15764   return adjust_automodify_address (mem, mode, addr, offset);
15765 }
15766
15767 /* Copy using LDRD/STRD instructions whenever possible.
15768    Returns true upon success. */
15769 bool
15770 gen_cpymem_ldrd_strd (rtx *operands)
15771 {
15772   unsigned HOST_WIDE_INT len;
15773   HOST_WIDE_INT align;
15774   rtx src, dst, base;
15775   rtx reg0;
15776   bool src_aligned, dst_aligned;
15777   bool src_volatile, dst_volatile;
15778
15779   gcc_assert (CONST_INT_P (operands[2]));
15780   gcc_assert (CONST_INT_P (operands[3]));
15781
15782   len = UINTVAL (operands[2]);
15783   if (len > 64)
15784     return false;
15785
15786   /* Maximum alignment we can assume for both src and dst buffers.  */
15787   align = INTVAL (operands[3]);
15788
15789   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15790     return false;
15791
15792   /* Place src and dst addresses in registers
15793      and update the corresponding mem rtx.  */
15794   dst = operands[0];
15795   dst_volatile = MEM_VOLATILE_P (dst);
15796   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15797   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15798   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15799
15800   src = operands[1];
15801   src_volatile = MEM_VOLATILE_P (src);
15802   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15803   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15804   src = adjust_automodify_address (src, VOIDmode, base, 0);
15805
15806   if (!unaligned_access && !(src_aligned && dst_aligned))
15807     return false;
15808
15809   if (src_volatile || dst_volatile)
15810     return false;
15811
15812   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15813   if (!(dst_aligned || src_aligned))
15814     return arm_gen_cpymemqi (operands);
15815
15816   /* If the either src or dst is unaligned we'll be accessing it as pairs
15817      of unaligned SImode accesses.  Otherwise we can generate DImode
15818      ldrd/strd instructions.  */
15819   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15820   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15821
15822   while (len >= 8)
15823     {
15824       len -= 8;
15825       reg0 = gen_reg_rtx (DImode);
15826       rtx first_reg = NULL_RTX;
15827       rtx second_reg = NULL_RTX;
15828
15829       if (!src_aligned || !dst_aligned)
15830         {
15831           if (BYTES_BIG_ENDIAN)
15832             {
15833               second_reg = gen_lowpart (SImode, reg0);
15834               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15835             }
15836           else
15837             {
15838               first_reg = gen_lowpart (SImode, reg0);
15839               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15840             }
15841         }
15842       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15843         emit_move_insn (reg0, src);
15844       else if (src_aligned)
15845         emit_insn (gen_unaligned_loaddi (reg0, src));
15846       else
15847         {
15848           emit_insn (gen_unaligned_loadsi (first_reg, src));
15849           src = next_consecutive_mem (src);
15850           emit_insn (gen_unaligned_loadsi (second_reg, src));
15851         }
15852
15853       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15854         emit_move_insn (dst, reg0);
15855       else if (dst_aligned)
15856         emit_insn (gen_unaligned_storedi (dst, reg0));
15857       else
15858         {
15859           emit_insn (gen_unaligned_storesi (dst, first_reg));
15860           dst = next_consecutive_mem (dst);
15861           emit_insn (gen_unaligned_storesi (dst, second_reg));
15862         }
15863
15864       src = next_consecutive_mem (src);
15865       dst = next_consecutive_mem (dst);
15866     }
15867
15868   gcc_assert (len < 8);
15869   if (len >= 4)
15870     {
15871       /* More than a word but less than a double-word to copy.  Copy a word.  */
15872       reg0 = gen_reg_rtx (SImode);
15873       src = adjust_address (src, SImode, 0);
15874       dst = adjust_address (dst, SImode, 0);
15875       if (src_aligned)
15876         emit_move_insn (reg0, src);
15877       else
15878         emit_insn (gen_unaligned_loadsi (reg0, src));
15879
15880       if (dst_aligned)
15881         emit_move_insn (dst, reg0);
15882       else
15883         emit_insn (gen_unaligned_storesi (dst, reg0));
15884
15885       src = next_consecutive_mem (src);
15886       dst = next_consecutive_mem (dst);
15887       len -= 4;
15888     }
15889
15890   if (len == 0)
15891     return true;
15892
15893   /* Copy the remaining bytes.  */
15894   if (len >= 2)
15895     {
15896       dst = adjust_address (dst, HImode, 0);
15897       src = adjust_address (src, HImode, 0);
15898       reg0 = gen_reg_rtx (SImode);
15899       if (src_aligned)
15900         emit_insn (gen_zero_extendhisi2 (reg0, src));
15901       else
15902         emit_insn (gen_unaligned_loadhiu (reg0, src));
15903
15904       if (dst_aligned)
15905         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15906       else
15907         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15908
15909       src = next_consecutive_mem (src);
15910       dst = next_consecutive_mem (dst);
15911       if (len == 2)
15912         return true;
15913     }
15914
15915   dst = adjust_address (dst, QImode, 0);
15916   src = adjust_address (src, QImode, 0);
15917   reg0 = gen_reg_rtx (QImode);
15918   emit_move_insn (reg0, src);
15919   emit_move_insn (dst, reg0);
15920   return true;
15921 }
15922
15923 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15924    into its component 32-bit subregs.  OP2 may be an immediate
15925    constant and we want to simplify it in that case.  */
15926 void
15927 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15928                         rtx *lo_op2, rtx *hi_op2)
15929 {
15930   *lo_op1 = gen_lowpart (SImode, op1);
15931   *hi_op1 = gen_highpart (SImode, op1);
15932   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15933                                  subreg_lowpart_offset (SImode, DImode));
15934   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15935                                  subreg_highpart_offset (SImode, DImode));
15936 }
15937
15938 /* Select a dominance comparison mode if possible for a test of the general
15939    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15940    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15941    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15942    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15943    In all cases OP will be either EQ or NE, but we don't need to know which
15944    here.  If we are unable to support a dominance comparison we return
15945    CC mode.  This will then fail to match for the RTL expressions that
15946    generate this call.  */
15947 machine_mode
15948 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15949 {
15950   enum rtx_code cond1, cond2;
15951   int swapped = 0;
15952
15953   /* Currently we will probably get the wrong result if the individual
15954      comparisons are not simple.  This also ensures that it is safe to
15955      reverse a comparison if necessary.  */
15956   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15957        != CCmode)
15958       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15959           != CCmode))
15960     return CCmode;
15961
15962   /* The if_then_else variant of this tests the second condition if the
15963      first passes, but is true if the first fails.  Reverse the first
15964      condition to get a true "inclusive-or" expression.  */
15965   if (cond_or == DOM_CC_NX_OR_Y)
15966     cond1 = reverse_condition (cond1);
15967
15968   /* If the comparisons are not equal, and one doesn't dominate the other,
15969      then we can't do this.  */
15970   if (cond1 != cond2
15971       && !comparison_dominates_p (cond1, cond2)
15972       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15973     return CCmode;
15974
15975   if (swapped)
15976     std::swap (cond1, cond2);
15977
15978   switch (cond1)
15979     {
15980     case EQ:
15981       if (cond_or == DOM_CC_X_AND_Y)
15982         return CC_DEQmode;
15983
15984       switch (cond2)
15985         {
15986         case EQ: return CC_DEQmode;
15987         case LE: return CC_DLEmode;
15988         case LEU: return CC_DLEUmode;
15989         case GE: return CC_DGEmode;
15990         case GEU: return CC_DGEUmode;
15991         default: gcc_unreachable ();
15992         }
15993
15994     case LT:
15995       if (cond_or == DOM_CC_X_AND_Y)
15996         return CC_DLTmode;
15997
15998       switch (cond2)
15999         {
16000         case  LT:
16001             return CC_DLTmode;
16002         case LE:
16003           return CC_DLEmode;
16004         case NE:
16005           return CC_DNEmode;
16006         default:
16007           gcc_unreachable ();
16008         }
16009
16010     case GT:
16011       if (cond_or == DOM_CC_X_AND_Y)
16012         return CC_DGTmode;
16013
16014       switch (cond2)
16015         {
16016         case GT:
16017           return CC_DGTmode;
16018         case GE:
16019           return CC_DGEmode;
16020         case NE:
16021           return CC_DNEmode;
16022         default:
16023           gcc_unreachable ();
16024         }
16025
16026     case LTU:
16027       if (cond_or == DOM_CC_X_AND_Y)
16028         return CC_DLTUmode;
16029
16030       switch (cond2)
16031         {
16032         case LTU:
16033           return CC_DLTUmode;
16034         case LEU:
16035           return CC_DLEUmode;
16036         case NE:
16037           return CC_DNEmode;
16038         default:
16039           gcc_unreachable ();
16040         }
16041
16042     case GTU:
16043       if (cond_or == DOM_CC_X_AND_Y)
16044         return CC_DGTUmode;
16045
16046       switch (cond2)
16047         {
16048         case GTU:
16049           return CC_DGTUmode;
16050         case GEU:
16051           return CC_DGEUmode;
16052         case NE:
16053           return CC_DNEmode;
16054         default:
16055           gcc_unreachable ();
16056         }
16057
16058     /* The remaining cases only occur when both comparisons are the
16059        same.  */
16060     case NE:
16061       gcc_assert (cond1 == cond2);
16062       return CC_DNEmode;
16063
16064     case LE:
16065       gcc_assert (cond1 == cond2);
16066       return CC_DLEmode;
16067
16068     case GE:
16069       gcc_assert (cond1 == cond2);
16070       return CC_DGEmode;
16071
16072     case LEU:
16073       gcc_assert (cond1 == cond2);
16074       return CC_DLEUmode;
16075
16076     case GEU:
16077       gcc_assert (cond1 == cond2);
16078       return CC_DGEUmode;
16079
16080     default:
16081       gcc_unreachable ();
16082     }
16083 }
16084
16085 machine_mode
16086 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16087 {
16088   /* All floating point compares return CCFP if it is an equality
16089      comparison, and CCFPE otherwise.  */
16090   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16091     {
16092       switch (op)
16093         {
16094         case EQ:
16095         case NE:
16096         case UNORDERED:
16097         case ORDERED:
16098         case UNLT:
16099         case UNLE:
16100         case UNGT:
16101         case UNGE:
16102         case UNEQ:
16103         case LTGT:
16104           return CCFPmode;
16105
16106         case LT:
16107         case LE:
16108         case GT:
16109         case GE:
16110           return CCFPEmode;
16111
16112         default:
16113           gcc_unreachable ();
16114         }
16115     }
16116
16117   /* A compare with a shifted operand.  Because of canonicalization, the
16118      comparison will have to be swapped when we emit the assembler.  */
16119   if (GET_MODE (y) == SImode
16120       && (REG_P (y) || (SUBREG_P (y)))
16121       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16122           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16123           || GET_CODE (x) == ROTATERT))
16124     return CC_SWPmode;
16125
16126   /* A widened compare of the sum of a value plus a carry against a
16127      constant.  This is a representation of RSC.  We want to swap the
16128      result of the comparison at output.  Not valid if the Z bit is
16129      needed.  */
16130   if (GET_MODE (x) == DImode
16131       && GET_CODE (x) == PLUS
16132       && arm_borrow_operation (XEXP (x, 1), DImode)
16133       && CONST_INT_P (y)
16134       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16135            && (op == LE || op == GT))
16136           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16137               && (op == LEU || op == GTU))))
16138     return CC_SWPmode;
16139
16140   /* If X is a constant we want to use CC_RSBmode.  This is
16141      non-canonical, but arm_gen_compare_reg uses this to generate the
16142      correct canonical form.  */
16143   if (GET_MODE (y) == SImode
16144       && (REG_P (y) || SUBREG_P (y))
16145       && CONST_INT_P (x))
16146     return CC_RSBmode;
16147
16148   /* This operation is performed swapped, but since we only rely on the Z
16149      flag we don't need an additional mode.  */
16150   if (GET_MODE (y) == SImode
16151       && (REG_P (y) || (SUBREG_P (y)))
16152       && GET_CODE (x) == NEG
16153       && (op == EQ || op == NE))
16154     return CC_Zmode;
16155
16156   /* This is a special case that is used by combine to allow a
16157      comparison of a shifted byte load to be split into a zero-extend
16158      followed by a comparison of the shifted integer (only valid for
16159      equalities and unsigned inequalities).  */
16160   if (GET_MODE (x) == SImode
16161       && GET_CODE (x) == ASHIFT
16162       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16163       && GET_CODE (XEXP (x, 0)) == SUBREG
16164       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16165       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16166       && (op == EQ || op == NE
16167           || op == GEU || op == GTU || op == LTU || op == LEU)
16168       && CONST_INT_P (y))
16169     return CC_Zmode;
16170
16171   /* A construct for a conditional compare, if the false arm contains
16172      0, then both conditions must be true, otherwise either condition
16173      must be true.  Not all conditions are possible, so CCmode is
16174      returned if it can't be done.  */
16175   if (GET_CODE (x) == IF_THEN_ELSE
16176       && (XEXP (x, 2) == const0_rtx
16177           || XEXP (x, 2) == const1_rtx)
16178       && COMPARISON_P (XEXP (x, 0))
16179       && COMPARISON_P (XEXP (x, 1)))
16180     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16181                                          INTVAL (XEXP (x, 2)));
16182
16183   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16184   if (GET_CODE (x) == AND
16185       && (op == EQ || op == NE)
16186       && COMPARISON_P (XEXP (x, 0))
16187       && COMPARISON_P (XEXP (x, 1)))
16188     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16189                                          DOM_CC_X_AND_Y);
16190
16191   if (GET_CODE (x) == IOR
16192       && (op == EQ || op == NE)
16193       && COMPARISON_P (XEXP (x, 0))
16194       && COMPARISON_P (XEXP (x, 1)))
16195     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16196                                          DOM_CC_X_OR_Y);
16197
16198   /* An operation (on Thumb) where we want to test for a single bit.
16199      This is done by shifting that bit up into the top bit of a
16200      scratch register; we can then branch on the sign bit.  */
16201   if (TARGET_THUMB1
16202       && GET_MODE (x) == SImode
16203       && (op == EQ || op == NE)
16204       && GET_CODE (x) == ZERO_EXTRACT
16205       && XEXP (x, 1) == const1_rtx)
16206     return CC_Nmode;
16207
16208   /* An operation that sets the condition codes as a side-effect, the
16209      V flag is not set correctly, so we can only use comparisons where
16210      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16211      instead.)  */
16212   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16213   if (GET_MODE (x) == SImode
16214       && y == const0_rtx
16215       && (op == EQ || op == NE || op == LT || op == GE)
16216       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16217           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16218           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16219           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16220           || GET_CODE (x) == LSHIFTRT
16221           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16222           || GET_CODE (x) == ROTATERT
16223           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16224     return CC_NZmode;
16225
16226   /* A comparison of ~reg with a const is really a special
16227      canoncialization of compare (~const, reg), which is a reverse
16228      subtract operation.  We may not get here if CONST is 0, but that
16229      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16230   if (GET_MODE (x) == SImode
16231       && GET_CODE (x) == NOT
16232       && CONST_INT_P (y))
16233     return CC_RSBmode;
16234
16235   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16236     return CC_Zmode;
16237
16238   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16239       && GET_CODE (x) == PLUS
16240       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16241     return CC_Cmode;
16242
16243   if (GET_MODE (x) == DImode
16244       && GET_CODE (x) == PLUS
16245       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16246       && CONST_INT_P (y)
16247       && UINTVAL (y) == 0x800000000
16248       && (op == GEU || op == LTU))
16249     return CC_ADCmode;
16250
16251   if (GET_MODE (x) == DImode
16252       && (op == GE || op == LT)
16253       && GET_CODE (x) == SIGN_EXTEND
16254       && ((GET_CODE (y) == PLUS
16255            && arm_borrow_operation (XEXP (y, 0), DImode))
16256           || arm_borrow_operation (y, DImode)))
16257     return CC_NVmode;
16258
16259   if (GET_MODE (x) == DImode
16260       && (op == GEU || op == LTU)
16261       && GET_CODE (x) == ZERO_EXTEND
16262       && ((GET_CODE (y) == PLUS
16263            && arm_borrow_operation (XEXP (y, 0), DImode))
16264           || arm_borrow_operation (y, DImode)))
16265     return CC_Bmode;
16266
16267   if (GET_MODE (x) == DImode
16268       && (op == EQ || op == NE)
16269       && (GET_CODE (x) == PLUS
16270           || GET_CODE (x) == MINUS)
16271       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16272           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16273       && GET_CODE (y) == SIGN_EXTEND
16274       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16275     return CC_Vmode;
16276
16277   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16278     return GET_MODE (x);
16279
16280   return CCmode;
16281 }
16282
16283 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16284    the sequence of instructions needed to generate a suitable condition
16285    code register.  Return the CC register result.  */
16286 static rtx
16287 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16288 {
16289   machine_mode mode;
16290   rtx cc_reg;
16291
16292     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16293   gcc_assert (TARGET_32BIT);
16294   gcc_assert (!CONST_INT_P (x));
16295
16296   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16297                                   subreg_lowpart_offset (SImode, DImode));
16298   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16299                                   subreg_highpart_offset (SImode, DImode));
16300   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16301                                   subreg_lowpart_offset (SImode, DImode));
16302   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16303                                   subreg_highpart_offset (SImode, DImode));
16304   switch (code)
16305     {
16306     case EQ:
16307     case NE:
16308       {
16309         if (y_lo == const0_rtx || y_hi == const0_rtx)
16310           {
16311             if (y_lo != const0_rtx)
16312               {
16313                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16314
16315                 gcc_assert (y_hi == const0_rtx);
16316                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16317                 if (!arm_add_operand (y_lo, SImode))
16318                   y_lo = force_reg (SImode, y_lo);
16319                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16320                 x_lo = scratch2;
16321               }
16322             else if (y_hi != const0_rtx)
16323               {
16324                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16325
16326                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16327                 if (!arm_add_operand (y_hi, SImode))
16328                   y_hi = force_reg (SImode, y_hi);
16329                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16330                 x_hi = scratch2;
16331               }
16332
16333             if (!scratch)
16334               {
16335                 gcc_assert (!reload_completed);
16336                 scratch = gen_rtx_SCRATCH (SImode);
16337               }
16338
16339             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16340             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16341
16342             rtx set
16343               = gen_rtx_SET (cc_reg,
16344                              gen_rtx_COMPARE (CC_NZmode,
16345                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16346                                               const0_rtx));
16347             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16348                                                               clobber)));
16349             return cc_reg;
16350           }
16351
16352         if (!arm_add_operand (y_lo, SImode))
16353           y_lo = force_reg (SImode, y_lo);
16354
16355         if (!arm_add_operand (y_hi, SImode))
16356           y_hi = force_reg (SImode, y_hi);
16357
16358         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16359         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16360         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16361         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16362         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16363
16364         emit_insn (gen_rtx_SET (cc_reg,
16365                                 gen_rtx_COMPARE (mode, conjunction,
16366                                                  const0_rtx)));
16367         return cc_reg;
16368       }
16369
16370     case LT:
16371     case GE:
16372       {
16373         if (y_lo == const0_rtx)
16374           {
16375             /* If the low word of y is 0, then this is simply a normal
16376                compare of the upper words.  */
16377             if (!arm_add_operand (y_hi, SImode))
16378               y_hi = force_reg (SImode, y_hi);
16379
16380             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16381           }
16382
16383         if (!arm_add_operand (y_lo, SImode))
16384           y_lo = force_reg (SImode, y_lo);
16385
16386         rtx cmp1
16387           = gen_rtx_LTU (DImode,
16388                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16389                          const0_rtx);
16390
16391         if (!scratch)
16392           scratch = gen_rtx_SCRATCH (SImode);
16393
16394         if (!arm_not_operand (y_hi, SImode))
16395           y_hi = force_reg (SImode, y_hi);
16396
16397         rtx_insn *insn;
16398         if (y_hi == const0_rtx)
16399           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16400                                                            cmp1));
16401         else if (CONST_INT_P (y_hi))
16402           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16403                                                              y_hi, cmp1));
16404         else
16405           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16406                                                          cmp1));
16407         return SET_DEST (single_set (insn));
16408       }
16409
16410     case LE:
16411     case GT:
16412       {
16413         /* During expansion, we only expect to get here if y is a
16414            constant that we want to handle, otherwise we should have
16415            swapped the operands already.  */
16416         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16417
16418         if (!const_ok_for_arm (INTVAL (y_lo)))
16419           y_lo = force_reg (SImode, y_lo);
16420
16421         /* Perform a reverse subtract and compare.  */
16422         rtx cmp1
16423           = gen_rtx_LTU (DImode,
16424                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16425                          const0_rtx);
16426         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16427                                                                  x_hi, cmp1));
16428         return SET_DEST (single_set (insn));
16429       }
16430
16431     case LTU:
16432     case GEU:
16433       {
16434         if (y_lo == const0_rtx)
16435           {
16436             /* If the low word of y is 0, then this is simply a normal
16437                compare of the upper words.  */
16438             if (!arm_add_operand (y_hi, SImode))
16439               y_hi = force_reg (SImode, y_hi);
16440
16441             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16442           }
16443
16444         if (!arm_add_operand (y_lo, SImode))
16445           y_lo = force_reg (SImode, y_lo);
16446
16447         rtx cmp1
16448           = gen_rtx_LTU (DImode,
16449                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16450                          const0_rtx);
16451
16452         if (!scratch)
16453           scratch = gen_rtx_SCRATCH (SImode);
16454         if (!arm_not_operand (y_hi, SImode))
16455           y_hi = force_reg (SImode, y_hi);
16456
16457         rtx_insn *insn;
16458         if (y_hi == const0_rtx)
16459           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16460                                                           cmp1));
16461         else if (CONST_INT_P (y_hi))
16462           {
16463             /* Constant is viewed as unsigned when zero-extended.  */
16464             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16465             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16466                                                               y_hi, cmp1));
16467           }
16468         else
16469           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16470                                                         cmp1));
16471         return SET_DEST (single_set (insn));
16472       }
16473
16474     case LEU:
16475     case GTU:
16476       {
16477         /* During expansion, we only expect to get here if y is a
16478            constant that we want to handle, otherwise we should have
16479            swapped the operands already.  */
16480         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16481
16482         if (!const_ok_for_arm (INTVAL (y_lo)))
16483           y_lo = force_reg (SImode, y_lo);
16484
16485         /* Perform a reverse subtract and compare.  */
16486         rtx cmp1
16487           = gen_rtx_LTU (DImode,
16488                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16489                          const0_rtx);
16490         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16491         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16492                                                                 x_hi, cmp1));
16493         return SET_DEST (single_set (insn));
16494       }
16495
16496     default:
16497       gcc_unreachable ();
16498     }
16499 }
16500
16501 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16502    return the rtx for register 0 in the proper mode.  */
16503 rtx
16504 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16505 {
16506   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16507     return arm_gen_dicompare_reg (code, x, y, scratch);
16508
16509   machine_mode mode = SELECT_CC_MODE (code, x, y);
16510   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16511   if (mode == CC_RSBmode)
16512     {
16513       if (!scratch)
16514         scratch = gen_rtx_SCRATCH (SImode);
16515       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16516                                               GEN_INT (~UINTVAL (x)), y));
16517     }
16518   else
16519     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16520
16521   return cc_reg;
16522 }
16523
16524 /* Generate a sequence of insns that will generate the correct return
16525    address mask depending on the physical architecture that the program
16526    is running on.  */
16527 rtx
16528 arm_gen_return_addr_mask (void)
16529 {
16530   rtx reg = gen_reg_rtx (Pmode);
16531
16532   emit_insn (gen_return_addr_mask (reg));
16533   return reg;
16534 }
16535
16536 void
16537 arm_reload_in_hi (rtx *operands)
16538 {
16539   rtx ref = operands[1];
16540   rtx base, scratch;
16541   HOST_WIDE_INT offset = 0;
16542
16543   if (SUBREG_P (ref))
16544     {
16545       offset = SUBREG_BYTE (ref);
16546       ref = SUBREG_REG (ref);
16547     }
16548
16549   if (REG_P (ref))
16550     {
16551       /* We have a pseudo which has been spilt onto the stack; there
16552          are two cases here: the first where there is a simple
16553          stack-slot replacement and a second where the stack-slot is
16554          out of range, or is used as a subreg.  */
16555       if (reg_equiv_mem (REGNO (ref)))
16556         {
16557           ref = reg_equiv_mem (REGNO (ref));
16558           base = find_replacement (&XEXP (ref, 0));
16559         }
16560       else
16561         /* The slot is out of range, or was dressed up in a SUBREG.  */
16562         base = reg_equiv_address (REGNO (ref));
16563
16564       /* PR 62554: If there is no equivalent memory location then just move
16565          the value as an SImode register move.  This happens when the target
16566          architecture variant does not have an HImode register move.  */
16567       if (base == NULL)
16568         {
16569           gcc_assert (REG_P (operands[0]));
16570           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16571                                 gen_rtx_SUBREG (SImode, ref, 0)));
16572           return;
16573         }
16574     }
16575   else
16576     base = find_replacement (&XEXP (ref, 0));
16577
16578   /* Handle the case where the address is too complex to be offset by 1.  */
16579   if (GET_CODE (base) == MINUS
16580       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16581     {
16582       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16583
16584       emit_set_insn (base_plus, base);
16585       base = base_plus;
16586     }
16587   else if (GET_CODE (base) == PLUS)
16588     {
16589       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16590       HOST_WIDE_INT hi, lo;
16591
16592       offset += INTVAL (XEXP (base, 1));
16593       base = XEXP (base, 0);
16594
16595       /* Rework the address into a legal sequence of insns.  */
16596       /* Valid range for lo is -4095 -> 4095 */
16597       lo = (offset >= 0
16598             ? (offset & 0xfff)
16599             : -((-offset) & 0xfff));
16600
16601       /* Corner case, if lo is the max offset then we would be out of range
16602          once we have added the additional 1 below, so bump the msb into the
16603          pre-loading insn(s).  */
16604       if (lo == 4095)
16605         lo &= 0x7ff;
16606
16607       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16608              ^ (HOST_WIDE_INT) 0x80000000)
16609             - (HOST_WIDE_INT) 0x80000000);
16610
16611       gcc_assert (hi + lo == offset);
16612
16613       if (hi != 0)
16614         {
16615           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16616
16617           /* Get the base address; addsi3 knows how to handle constants
16618              that require more than one insn.  */
16619           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16620           base = base_plus;
16621           offset = lo;
16622         }
16623     }
16624
16625   /* Operands[2] may overlap operands[0] (though it won't overlap
16626      operands[1]), that's why we asked for a DImode reg -- so we can
16627      use the bit that does not overlap.  */
16628   if (REGNO (operands[2]) == REGNO (operands[0]))
16629     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16630   else
16631     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16632
16633   emit_insn (gen_zero_extendqisi2 (scratch,
16634                                    gen_rtx_MEM (QImode,
16635                                                 plus_constant (Pmode, base,
16636                                                                offset))));
16637   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16638                                    gen_rtx_MEM (QImode,
16639                                                 plus_constant (Pmode, base,
16640                                                                offset + 1))));
16641   if (!BYTES_BIG_ENDIAN)
16642     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16643                    gen_rtx_IOR (SImode,
16644                                 gen_rtx_ASHIFT
16645                                 (SImode,
16646                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16647                                  GEN_INT (8)),
16648                                 scratch));
16649   else
16650     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16651                    gen_rtx_IOR (SImode,
16652                                 gen_rtx_ASHIFT (SImode, scratch,
16653                                                 GEN_INT (8)),
16654                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16655 }
16656
16657 /* Handle storing a half-word to memory during reload by synthesizing as two
16658    byte stores.  Take care not to clobber the input values until after we
16659    have moved them somewhere safe.  This code assumes that if the DImode
16660    scratch in operands[2] overlaps either the input value or output address
16661    in some way, then that value must die in this insn (we absolutely need
16662    two scratch registers for some corner cases).  */
16663 void
16664 arm_reload_out_hi (rtx *operands)
16665 {
16666   rtx ref = operands[0];
16667   rtx outval = operands[1];
16668   rtx base, scratch;
16669   HOST_WIDE_INT offset = 0;
16670
16671   if (SUBREG_P (ref))
16672     {
16673       offset = SUBREG_BYTE (ref);
16674       ref = SUBREG_REG (ref);
16675     }
16676
16677   if (REG_P (ref))
16678     {
16679       /* We have a pseudo which has been spilt onto the stack; there
16680          are two cases here: the first where there is a simple
16681          stack-slot replacement and a second where the stack-slot is
16682          out of range, or is used as a subreg.  */
16683       if (reg_equiv_mem (REGNO (ref)))
16684         {
16685           ref = reg_equiv_mem (REGNO (ref));
16686           base = find_replacement (&XEXP (ref, 0));
16687         }
16688       else
16689         /* The slot is out of range, or was dressed up in a SUBREG.  */
16690         base = reg_equiv_address (REGNO (ref));
16691
16692       /* PR 62254: If there is no equivalent memory location then just move
16693          the value as an SImode register move.  This happens when the target
16694          architecture variant does not have an HImode register move.  */
16695       if (base == NULL)
16696         {
16697           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16698
16699           if (REG_P (outval))
16700             {
16701               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16702                                     gen_rtx_SUBREG (SImode, outval, 0)));
16703             }
16704           else /* SUBREG_P (outval)  */
16705             {
16706               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16707                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16708                                       SUBREG_REG (outval)));
16709               else
16710                 /* FIXME: Handle other cases ?  */
16711                 gcc_unreachable ();
16712             }
16713           return;
16714         }
16715     }
16716   else
16717     base = find_replacement (&XEXP (ref, 0));
16718
16719   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16720
16721   /* Handle the case where the address is too complex to be offset by 1.  */
16722   if (GET_CODE (base) == MINUS
16723       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16724     {
16725       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16726
16727       /* Be careful not to destroy OUTVAL.  */
16728       if (reg_overlap_mentioned_p (base_plus, outval))
16729         {
16730           /* Updating base_plus might destroy outval, see if we can
16731              swap the scratch and base_plus.  */
16732           if (!reg_overlap_mentioned_p (scratch, outval))
16733             std::swap (scratch, base_plus);
16734           else
16735             {
16736               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16737
16738               /* Be conservative and copy OUTVAL into the scratch now,
16739                  this should only be necessary if outval is a subreg
16740                  of something larger than a word.  */
16741               /* XXX Might this clobber base?  I can't see how it can,
16742                  since scratch is known to overlap with OUTVAL, and
16743                  must be wider than a word.  */
16744               emit_insn (gen_movhi (scratch_hi, outval));
16745               outval = scratch_hi;
16746             }
16747         }
16748
16749       emit_set_insn (base_plus, base);
16750       base = base_plus;
16751     }
16752   else if (GET_CODE (base) == PLUS)
16753     {
16754       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16755       HOST_WIDE_INT hi, lo;
16756
16757       offset += INTVAL (XEXP (base, 1));
16758       base = XEXP (base, 0);
16759
16760       /* Rework the address into a legal sequence of insns.  */
16761       /* Valid range for lo is -4095 -> 4095 */
16762       lo = (offset >= 0
16763             ? (offset & 0xfff)
16764             : -((-offset) & 0xfff));
16765
16766       /* Corner case, if lo is the max offset then we would be out of range
16767          once we have added the additional 1 below, so bump the msb into the
16768          pre-loading insn(s).  */
16769       if (lo == 4095)
16770         lo &= 0x7ff;
16771
16772       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16773              ^ (HOST_WIDE_INT) 0x80000000)
16774             - (HOST_WIDE_INT) 0x80000000);
16775
16776       gcc_assert (hi + lo == offset);
16777
16778       if (hi != 0)
16779         {
16780           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16781
16782           /* Be careful not to destroy OUTVAL.  */
16783           if (reg_overlap_mentioned_p (base_plus, outval))
16784             {
16785               /* Updating base_plus might destroy outval, see if we
16786                  can swap the scratch and base_plus.  */
16787               if (!reg_overlap_mentioned_p (scratch, outval))
16788                 std::swap (scratch, base_plus);
16789               else
16790                 {
16791                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16792
16793                   /* Be conservative and copy outval into scratch now,
16794                      this should only be necessary if outval is a
16795                      subreg of something larger than a word.  */
16796                   /* XXX Might this clobber base?  I can't see how it
16797                      can, since scratch is known to overlap with
16798                      outval.  */
16799                   emit_insn (gen_movhi (scratch_hi, outval));
16800                   outval = scratch_hi;
16801                 }
16802             }
16803
16804           /* Get the base address; addsi3 knows how to handle constants
16805              that require more than one insn.  */
16806           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16807           base = base_plus;
16808           offset = lo;
16809         }
16810     }
16811
16812   if (BYTES_BIG_ENDIAN)
16813     {
16814       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16815                                          plus_constant (Pmode, base,
16816                                                         offset + 1)),
16817                             gen_lowpart (QImode, outval)));
16818       emit_insn (gen_lshrsi3 (scratch,
16819                               gen_rtx_SUBREG (SImode, outval, 0),
16820                               GEN_INT (8)));
16821       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16822                                                                 offset)),
16823                             gen_lowpart (QImode, scratch)));
16824     }
16825   else
16826     {
16827       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16828                                                                 offset)),
16829                             gen_lowpart (QImode, outval)));
16830       emit_insn (gen_lshrsi3 (scratch,
16831                               gen_rtx_SUBREG (SImode, outval, 0),
16832                               GEN_INT (8)));
16833       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16834                                          plus_constant (Pmode, base,
16835                                                         offset + 1)),
16836                             gen_lowpart (QImode, scratch)));
16837     }
16838 }
16839
16840 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16841    (padded to the size of a word) should be passed in a register.  */
16842
16843 static bool
16844 arm_must_pass_in_stack (const function_arg_info &arg)
16845 {
16846   if (TARGET_AAPCS_BASED)
16847     return must_pass_in_stack_var_size (arg);
16848   else
16849     return must_pass_in_stack_var_size_or_pad (arg);
16850 }
16851
16852
16853 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16854    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16855    the default.  For AAPCS based ABIs small aggregate types are placed
16856    in the lowest memory address.  */
16857
16858 static pad_direction
16859 arm_function_arg_padding (machine_mode mode, const_tree type)
16860 {
16861   if (!TARGET_AAPCS_BASED)
16862     return default_function_arg_padding (mode, type);
16863
16864   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16865     return PAD_DOWNWARD;
16866
16867   return PAD_UPWARD;
16868 }
16869
16870
16871 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16872    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16873    register has useful data, and return the opposite if the most
16874    significant byte does.  */
16875
16876 bool
16877 arm_pad_reg_upward (machine_mode mode,
16878                     tree type, int first ATTRIBUTE_UNUSED)
16879 {
16880   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16881     {
16882       /* For AAPCS, small aggregates, small fixed-point types,
16883          and small complex types are always padded upwards.  */
16884       if (type)
16885         {
16886           if ((AGGREGATE_TYPE_P (type)
16887                || TREE_CODE (type) == COMPLEX_TYPE
16888                || FIXED_POINT_TYPE_P (type))
16889               && int_size_in_bytes (type) <= 4)
16890             return true;
16891         }
16892       else
16893         {
16894           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16895               && GET_MODE_SIZE (mode) <= 4)
16896             return true;
16897         }
16898     }
16899
16900   /* Otherwise, use default padding.  */
16901   return !BYTES_BIG_ENDIAN;
16902 }
16903
16904 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16905    assuming that the address in the base register is word aligned.  */
16906 bool
16907 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16908 {
16909   HOST_WIDE_INT max_offset;
16910
16911   /* Offset must be a multiple of 4 in Thumb mode.  */
16912   if (TARGET_THUMB2 && ((offset & 3) != 0))
16913     return false;
16914
16915   if (TARGET_THUMB2)
16916     max_offset = 1020;
16917   else if (TARGET_ARM)
16918     max_offset = 255;
16919   else
16920     return false;
16921
16922   return ((offset <= max_offset) && (offset >= -max_offset));
16923 }
16924
16925 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16926    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16927    Assumes that the address in the base register RN is word aligned.  Pattern
16928    guarantees that both memory accesses use the same base register,
16929    the offsets are constants within the range, and the gap between the offsets is 4.
16930    If preload complete then check that registers are legal.  WBACK indicates whether
16931    address is updated.  LOAD indicates whether memory access is load or store.  */
16932 bool
16933 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16934                        bool wback, bool load)
16935 {
16936   unsigned int t, t2, n;
16937
16938   if (!reload_completed)
16939     return true;
16940
16941   if (!offset_ok_for_ldrd_strd (offset))
16942     return false;
16943
16944   t = REGNO (rt);
16945   t2 = REGNO (rt2);
16946   n = REGNO (rn);
16947
16948   if ((TARGET_THUMB2)
16949       && ((wback && (n == t || n == t2))
16950           || (t == SP_REGNUM)
16951           || (t == PC_REGNUM)
16952           || (t2 == SP_REGNUM)
16953           || (t2 == PC_REGNUM)
16954           || (!load && (n == PC_REGNUM))
16955           || (load && (t == t2))
16956           /* Triggers Cortex-M3 LDRD errata.  */
16957           || (!wback && load && fix_cm3_ldrd && (n == t))))
16958     return false;
16959
16960   if ((TARGET_ARM)
16961       && ((wback && (n == t || n == t2))
16962           || (t2 == PC_REGNUM)
16963           || (t % 2 != 0)   /* First destination register is not even.  */
16964           || (t2 != t + 1)
16965           /* PC can be used as base register (for offset addressing only),
16966              but it is depricated.  */
16967           || (n == PC_REGNUM)))
16968     return false;
16969
16970   return true;
16971 }
16972
16973 /* Return true if a 64-bit access with alignment ALIGN and with a
16974    constant offset OFFSET from the base pointer is permitted on this
16975    architecture.  */
16976 static bool
16977 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16978 {
16979   return (unaligned_access
16980           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16981           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16982 }
16983
16984 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16985    operand MEM's address contains an immediate offset from the base
16986    register and has no side effects, in which case it sets BASE,
16987    OFFSET and ALIGN accordingly.  */
16988 static bool
16989 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16990 {
16991   rtx addr;
16992
16993   gcc_assert (base != NULL && offset != NULL);
16994
16995   /* TODO: Handle more general memory operand patterns, such as
16996      PRE_DEC and PRE_INC.  */
16997
16998   if (side_effects_p (mem))
16999     return false;
17000
17001   /* Can't deal with subregs.  */
17002   if (SUBREG_P (mem))
17003     return false;
17004
17005   gcc_assert (MEM_P (mem));
17006
17007   *offset = const0_rtx;
17008   *align = MEM_ALIGN (mem);
17009
17010   addr = XEXP (mem, 0);
17011
17012   /* If addr isn't valid for DImode, then we can't handle it.  */
17013   if (!arm_legitimate_address_p (DImode, addr,
17014                                  reload_in_progress || reload_completed))
17015     return false;
17016
17017   if (REG_P (addr))
17018     {
17019       *base = addr;
17020       return true;
17021     }
17022   else if (GET_CODE (addr) == PLUS)
17023     {
17024       *base = XEXP (addr, 0);
17025       *offset = XEXP (addr, 1);
17026       return (REG_P (*base) && CONST_INT_P (*offset));
17027     }
17028
17029   return false;
17030 }
17031
17032 /* Called from a peephole2 to replace two word-size accesses with a
17033    single LDRD/STRD instruction.  Returns true iff we can generate a
17034    new instruction sequence.  That is, both accesses use the same base
17035    register and the gap between constant offsets is 4.  This function
17036    may reorder its operands to match ldrd/strd RTL templates.
17037    OPERANDS are the operands found by the peephole matcher;
17038    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17039    corresponding memory operands.  LOAD indicaates whether the access
17040    is load or store.  CONST_STORE indicates a store of constant
17041    integer values held in OPERANDS[4,5] and assumes that the pattern
17042    is of length 4 insn, for the purpose of checking dead registers.
17043    COMMUTE indicates that register operands may be reordered.  */
17044 bool
17045 gen_operands_ldrd_strd (rtx *operands, bool load,
17046                         bool const_store, bool commute)
17047 {
17048   int nops = 2;
17049   HOST_WIDE_INT offsets[2], offset, align[2];
17050   rtx base = NULL_RTX;
17051   rtx cur_base, cur_offset, tmp;
17052   int i, gap;
17053   HARD_REG_SET regset;
17054
17055   gcc_assert (!const_store || !load);
17056   /* Check that the memory references are immediate offsets from the
17057      same base register.  Extract the base register, the destination
17058      registers, and the corresponding memory offsets.  */
17059   for (i = 0; i < nops; i++)
17060     {
17061       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17062                                  &align[i]))
17063         return false;
17064
17065       if (i == 0)
17066         base = cur_base;
17067       else if (REGNO (base) != REGNO (cur_base))
17068         return false;
17069
17070       offsets[i] = INTVAL (cur_offset);
17071       if (GET_CODE (operands[i]) == SUBREG)
17072         {
17073           tmp = SUBREG_REG (operands[i]);
17074           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17075           operands[i] = tmp;
17076         }
17077     }
17078
17079   /* Make sure there is no dependency between the individual loads.  */
17080   if (load && REGNO (operands[0]) == REGNO (base))
17081     return false; /* RAW */
17082
17083   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17084     return false; /* WAW */
17085
17086   /* If the same input register is used in both stores
17087      when storing different constants, try to find a free register.
17088      For example, the code
17089         mov r0, 0
17090         str r0, [r2]
17091         mov r0, 1
17092         str r0, [r2, #4]
17093      can be transformed into
17094         mov r1, 0
17095         mov r0, 1
17096         strd r1, r0, [r2]
17097      in Thumb mode assuming that r1 is free.
17098      For ARM mode do the same but only if the starting register
17099      can be made to be even.  */
17100   if (const_store
17101       && REGNO (operands[0]) == REGNO (operands[1])
17102       && INTVAL (operands[4]) != INTVAL (operands[5]))
17103     {
17104     if (TARGET_THUMB2)
17105       {
17106         CLEAR_HARD_REG_SET (regset);
17107         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17108         if (tmp == NULL_RTX)
17109           return false;
17110
17111         /* Use the new register in the first load to ensure that
17112            if the original input register is not dead after peephole,
17113            then it will have the correct constant value.  */
17114         operands[0] = tmp;
17115       }
17116     else if (TARGET_ARM)
17117       {
17118         int regno = REGNO (operands[0]);
17119         if (!peep2_reg_dead_p (4, operands[0]))
17120           {
17121             /* When the input register is even and is not dead after the
17122                pattern, it has to hold the second constant but we cannot
17123                form a legal STRD in ARM mode with this register as the second
17124                register.  */
17125             if (regno % 2 == 0)
17126               return false;
17127
17128             /* Is regno-1 free? */
17129             SET_HARD_REG_SET (regset);
17130             CLEAR_HARD_REG_BIT(regset, regno - 1);
17131             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17132             if (tmp == NULL_RTX)
17133               return false;
17134
17135             operands[0] = tmp;
17136           }
17137         else
17138           {
17139             /* Find a DImode register.  */
17140             CLEAR_HARD_REG_SET (regset);
17141             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17142             if (tmp != NULL_RTX)
17143               {
17144                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17145                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17146               }
17147             else
17148               {
17149                 /* Can we use the input register to form a DI register?  */
17150                 SET_HARD_REG_SET (regset);
17151                 CLEAR_HARD_REG_BIT(regset,
17152                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17153                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17154                 if (tmp == NULL_RTX)
17155                   return false;
17156                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17157               }
17158           }
17159
17160         gcc_assert (operands[0] != NULL_RTX);
17161         gcc_assert (operands[1] != NULL_RTX);
17162         gcc_assert (REGNO (operands[0]) % 2 == 0);
17163         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17164       }
17165     }
17166
17167   /* Make sure the instructions are ordered with lower memory access first.  */
17168   if (offsets[0] > offsets[1])
17169     {
17170       gap = offsets[0] - offsets[1];
17171       offset = offsets[1];
17172
17173       /* Swap the instructions such that lower memory is accessed first.  */
17174       std::swap (operands[0], operands[1]);
17175       std::swap (operands[2], operands[3]);
17176       std::swap (align[0], align[1]);
17177       if (const_store)
17178         std::swap (operands[4], operands[5]);
17179     }
17180   else
17181     {
17182       gap = offsets[1] - offsets[0];
17183       offset = offsets[0];
17184     }
17185
17186   /* Make sure accesses are to consecutive memory locations.  */
17187   if (gap != GET_MODE_SIZE (SImode))
17188     return false;
17189
17190   if (!align_ok_ldrd_strd (align[0], offset))
17191     return false;
17192
17193   /* Make sure we generate legal instructions.  */
17194   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17195                              false, load))
17196     return true;
17197
17198   /* In Thumb state, where registers are almost unconstrained, there
17199      is little hope to fix it.  */
17200   if (TARGET_THUMB2)
17201     return false;
17202
17203   if (load && commute)
17204     {
17205       /* Try reordering registers.  */
17206       std::swap (operands[0], operands[1]);
17207       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17208                                  false, load))
17209         return true;
17210     }
17211
17212   if (const_store)
17213     {
17214       /* If input registers are dead after this pattern, they can be
17215          reordered or replaced by other registers that are free in the
17216          current pattern.  */
17217       if (!peep2_reg_dead_p (4, operands[0])
17218           || !peep2_reg_dead_p (4, operands[1]))
17219         return false;
17220
17221       /* Try to reorder the input registers.  */
17222       /* For example, the code
17223            mov r0, 0
17224            mov r1, 1
17225            str r1, [r2]
17226            str r0, [r2, #4]
17227          can be transformed into
17228            mov r1, 0
17229            mov r0, 1
17230            strd r0, [r2]
17231       */
17232       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17233                                   false, false))
17234         {
17235           std::swap (operands[0], operands[1]);
17236           return true;
17237         }
17238
17239       /* Try to find a free DI register.  */
17240       CLEAR_HARD_REG_SET (regset);
17241       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17242       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17243       while (true)
17244         {
17245           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17246           if (tmp == NULL_RTX)
17247             return false;
17248
17249           /* DREG must be an even-numbered register in DImode.
17250              Split it into SI registers.  */
17251           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17252           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17253           gcc_assert (operands[0] != NULL_RTX);
17254           gcc_assert (operands[1] != NULL_RTX);
17255           gcc_assert (REGNO (operands[0]) % 2 == 0);
17256           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17257
17258           return (operands_ok_ldrd_strd (operands[0], operands[1],
17259                                          base, offset,
17260                                          false, load));
17261         }
17262     }
17263
17264   return false;
17265 }
17266
17267
17268 /* Return true if parallel execution of the two word-size accesses provided
17269    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17270    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17271    register operands and OPERANDS[2,3] are the corresponding memory operands.
17272    */
17273 bool
17274 valid_operands_ldrd_strd (rtx *operands, bool load)
17275 {
17276   int nops = 2;
17277   HOST_WIDE_INT offsets[2], offset, align[2];
17278   rtx base = NULL_RTX;
17279   rtx cur_base, cur_offset;
17280   int i, gap;
17281
17282   /* Check that the memory references are immediate offsets from the
17283      same base register.  Extract the base register, the destination
17284      registers, and the corresponding memory offsets.  */
17285   for (i = 0; i < nops; i++)
17286     {
17287       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17288                                  &align[i]))
17289         return false;
17290
17291       if (i == 0)
17292         base = cur_base;
17293       else if (REGNO (base) != REGNO (cur_base))
17294         return false;
17295
17296       offsets[i] = INTVAL (cur_offset);
17297       if (GET_CODE (operands[i]) == SUBREG)
17298         return false;
17299     }
17300
17301   if (offsets[0] > offsets[1])
17302     return false;
17303
17304   gap = offsets[1] - offsets[0];
17305   offset = offsets[0];
17306
17307   /* Make sure accesses are to consecutive memory locations.  */
17308   if (gap != GET_MODE_SIZE (SImode))
17309     return false;
17310
17311   if (!align_ok_ldrd_strd (align[0], offset))
17312     return false;
17313
17314   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17315                                 false, load);
17316 }
17317
17318 \f
17319 /* Print a symbolic form of X to the debug file, F.  */
17320 static void
17321 arm_print_value (FILE *f, rtx x)
17322 {
17323   switch (GET_CODE (x))
17324     {
17325     case CONST_INT:
17326       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17327       return;
17328
17329     case CONST_DOUBLE:
17330       {
17331         char fpstr[20];
17332         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17333                          sizeof (fpstr), 0, 1);
17334         fputs (fpstr, f);
17335       }
17336       return;
17337
17338     case CONST_VECTOR:
17339       {
17340         int i;
17341
17342         fprintf (f, "<");
17343         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17344           {
17345             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17346             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17347               fputc (',', f);
17348           }
17349         fprintf (f, ">");
17350       }
17351       return;
17352
17353     case CONST_STRING:
17354       fprintf (f, "\"%s\"", XSTR (x, 0));
17355       return;
17356
17357     case SYMBOL_REF:
17358       fprintf (f, "`%s'", XSTR (x, 0));
17359       return;
17360
17361     case LABEL_REF:
17362       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17363       return;
17364
17365     case CONST:
17366       arm_print_value (f, XEXP (x, 0));
17367       return;
17368
17369     case PLUS:
17370       arm_print_value (f, XEXP (x, 0));
17371       fprintf (f, "+");
17372       arm_print_value (f, XEXP (x, 1));
17373       return;
17374
17375     case PC:
17376       fprintf (f, "pc");
17377       return;
17378
17379     default:
17380       fprintf (f, "????");
17381       return;
17382     }
17383 }
17384 \f
17385 /* Routines for manipulation of the constant pool.  */
17386
17387 /* Arm instructions cannot load a large constant directly into a
17388    register; they have to come from a pc relative load.  The constant
17389    must therefore be placed in the addressable range of the pc
17390    relative load.  Depending on the precise pc relative load
17391    instruction the range is somewhere between 256 bytes and 4k.  This
17392    means that we often have to dump a constant inside a function, and
17393    generate code to branch around it.
17394
17395    It is important to minimize this, since the branches will slow
17396    things down and make the code larger.
17397
17398    Normally we can hide the table after an existing unconditional
17399    branch so that there is no interruption of the flow, but in the
17400    worst case the code looks like this:
17401
17402         ldr     rn, L1
17403         ...
17404         b       L2
17405         align
17406         L1:     .long value
17407         L2:
17408         ...
17409
17410         ldr     rn, L3
17411         ...
17412         b       L4
17413         align
17414         L3:     .long value
17415         L4:
17416         ...
17417
17418    We fix this by performing a scan after scheduling, which notices
17419    which instructions need to have their operands fetched from the
17420    constant table and builds the table.
17421
17422    The algorithm starts by building a table of all the constants that
17423    need fixing up and all the natural barriers in the function (places
17424    where a constant table can be dropped without breaking the flow).
17425    For each fixup we note how far the pc-relative replacement will be
17426    able to reach and the offset of the instruction into the function.
17427
17428    Having built the table we then group the fixes together to form
17429    tables that are as large as possible (subject to addressing
17430    constraints) and emit each table of constants after the last
17431    barrier that is within range of all the instructions in the group.
17432    If a group does not contain a barrier, then we forcibly create one
17433    by inserting a jump instruction into the flow.  Once the table has
17434    been inserted, the insns are then modified to reference the
17435    relevant entry in the pool.
17436
17437    Possible enhancements to the algorithm (not implemented) are:
17438
17439    1) For some processors and object formats, there may be benefit in
17440    aligning the pools to the start of cache lines; this alignment
17441    would need to be taken into account when calculating addressability
17442    of a pool.  */
17443
17444 /* These typedefs are located at the start of this file, so that
17445    they can be used in the prototypes there.  This comment is to
17446    remind readers of that fact so that the following structures
17447    can be understood more easily.
17448
17449      typedef struct minipool_node    Mnode;
17450      typedef struct minipool_fixup   Mfix;  */
17451
17452 struct minipool_node
17453 {
17454   /* Doubly linked chain of entries.  */
17455   Mnode * next;
17456   Mnode * prev;
17457   /* The maximum offset into the code that this entry can be placed.  While
17458      pushing fixes for forward references, all entries are sorted in order
17459      of increasing max_address.  */
17460   HOST_WIDE_INT max_address;
17461   /* Similarly for an entry inserted for a backwards ref.  */
17462   HOST_WIDE_INT min_address;
17463   /* The number of fixes referencing this entry.  This can become zero
17464      if we "unpush" an entry.  In this case we ignore the entry when we
17465      come to emit the code.  */
17466   int refcount;
17467   /* The offset from the start of the minipool.  */
17468   HOST_WIDE_INT offset;
17469   /* The value in table.  */
17470   rtx value;
17471   /* The mode of value.  */
17472   machine_mode mode;
17473   /* The size of the value.  With iWMMXt enabled
17474      sizes > 4 also imply an alignment of 8-bytes.  */
17475   int fix_size;
17476 };
17477
17478 struct minipool_fixup
17479 {
17480   Mfix *            next;
17481   rtx_insn *        insn;
17482   HOST_WIDE_INT     address;
17483   rtx *             loc;
17484   machine_mode mode;
17485   int               fix_size;
17486   rtx               value;
17487   Mnode *           minipool;
17488   HOST_WIDE_INT     forwards;
17489   HOST_WIDE_INT     backwards;
17490 };
17491
17492 /* Fixes less than a word need padding out to a word boundary.  */
17493 #define MINIPOOL_FIX_SIZE(mode) \
17494   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17495
17496 static Mnode *  minipool_vector_head;
17497 static Mnode *  minipool_vector_tail;
17498 static rtx_code_label   *minipool_vector_label;
17499 static int      minipool_pad;
17500
17501 /* The linked list of all minipool fixes required for this function.  */
17502 Mfix *          minipool_fix_head;
17503 Mfix *          minipool_fix_tail;
17504 /* The fix entry for the current minipool, once it has been placed.  */
17505 Mfix *          minipool_barrier;
17506
17507 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17508 #define JUMP_TABLES_IN_TEXT_SECTION 0
17509 #endif
17510
17511 static HOST_WIDE_INT
17512 get_jump_table_size (rtx_jump_table_data *insn)
17513 {
17514   /* ADDR_VECs only take room if read-only data does into the text
17515      section.  */
17516   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17517     {
17518       rtx body = PATTERN (insn);
17519       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17520       HOST_WIDE_INT size;
17521       HOST_WIDE_INT modesize;
17522
17523       modesize = GET_MODE_SIZE (GET_MODE (body));
17524       size = modesize * XVECLEN (body, elt);
17525       switch (modesize)
17526         {
17527         case 1:
17528           /* Round up size  of TBB table to a halfword boundary.  */
17529           size = (size + 1) & ~HOST_WIDE_INT_1;
17530           break;
17531         case 2:
17532           /* No padding necessary for TBH.  */
17533           break;
17534         case 4:
17535           /* Add two bytes for alignment on Thumb.  */
17536           if (TARGET_THUMB)
17537             size += 2;
17538           break;
17539         default:
17540           gcc_unreachable ();
17541         }
17542       return size;
17543     }
17544
17545   return 0;
17546 }
17547
17548 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17549    function descriptor) into a register and the GOT address into the
17550    FDPIC register, returning an rtx for the register holding the
17551    function address.  */
17552
17553 rtx
17554 arm_load_function_descriptor (rtx funcdesc)
17555 {
17556   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17557   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17558   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17559   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17560
17561   emit_move_insn (fnaddr_reg, fnaddr);
17562
17563   /* The ABI requires the entry point address to be loaded first, but
17564      since we cannot support lazy binding for lack of atomic load of
17565      two 32-bits values, we do not need to bother to prevent the
17566      previous load from being moved after that of the GOT address.  */
17567   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17568
17569   return fnaddr_reg;
17570 }
17571
17572 /* Return the maximum amount of padding that will be inserted before
17573    label LABEL.  */
17574 static HOST_WIDE_INT
17575 get_label_padding (rtx label)
17576 {
17577   HOST_WIDE_INT align, min_insn_size;
17578
17579   align = 1 << label_to_alignment (label).levels[0].log;
17580   min_insn_size = TARGET_THUMB ? 2 : 4;
17581   return align > min_insn_size ? align - min_insn_size : 0;
17582 }
17583
17584 /* Move a minipool fix MP from its current location to before MAX_MP.
17585    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17586    constraints may need updating.  */
17587 static Mnode *
17588 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17589                                HOST_WIDE_INT max_address)
17590 {
17591   /* The code below assumes these are different.  */
17592   gcc_assert (mp != max_mp);
17593
17594   if (max_mp == NULL)
17595     {
17596       if (max_address < mp->max_address)
17597         mp->max_address = max_address;
17598     }
17599   else
17600     {
17601       if (max_address > max_mp->max_address - mp->fix_size)
17602         mp->max_address = max_mp->max_address - mp->fix_size;
17603       else
17604         mp->max_address = max_address;
17605
17606       /* Unlink MP from its current position.  Since max_mp is non-null,
17607        mp->prev must be non-null.  */
17608       mp->prev->next = mp->next;
17609       if (mp->next != NULL)
17610         mp->next->prev = mp->prev;
17611       else
17612         minipool_vector_tail = mp->prev;
17613
17614       /* Re-insert it before MAX_MP.  */
17615       mp->next = max_mp;
17616       mp->prev = max_mp->prev;
17617       max_mp->prev = mp;
17618
17619       if (mp->prev != NULL)
17620         mp->prev->next = mp;
17621       else
17622         minipool_vector_head = mp;
17623     }
17624
17625   /* Save the new entry.  */
17626   max_mp = mp;
17627
17628   /* Scan over the preceding entries and adjust their addresses as
17629      required.  */
17630   while (mp->prev != NULL
17631          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17632     {
17633       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17634       mp = mp->prev;
17635     }
17636
17637   return max_mp;
17638 }
17639
17640 /* Add a constant to the minipool for a forward reference.  Returns the
17641    node added or NULL if the constant will not fit in this pool.  */
17642 static Mnode *
17643 add_minipool_forward_ref (Mfix *fix)
17644 {
17645   /* If set, max_mp is the first pool_entry that has a lower
17646      constraint than the one we are trying to add.  */
17647   Mnode *       max_mp = NULL;
17648   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17649   Mnode *       mp;
17650
17651   /* If the minipool starts before the end of FIX->INSN then this FIX
17652      cannot be placed into the current pool.  Furthermore, adding the
17653      new constant pool entry may cause the pool to start FIX_SIZE bytes
17654      earlier.  */
17655   if (minipool_vector_head &&
17656       (fix->address + get_attr_length (fix->insn)
17657        >= minipool_vector_head->max_address - fix->fix_size))
17658     return NULL;
17659
17660   /* Scan the pool to see if a constant with the same value has
17661      already been added.  While we are doing this, also note the
17662      location where we must insert the constant if it doesn't already
17663      exist.  */
17664   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17665     {
17666       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17667           && fix->mode == mp->mode
17668           && (!LABEL_P (fix->value)
17669               || (CODE_LABEL_NUMBER (fix->value)
17670                   == CODE_LABEL_NUMBER (mp->value)))
17671           && rtx_equal_p (fix->value, mp->value))
17672         {
17673           /* More than one fix references this entry.  */
17674           mp->refcount++;
17675           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17676         }
17677
17678       /* Note the insertion point if necessary.  */
17679       if (max_mp == NULL
17680           && mp->max_address > max_address)
17681         max_mp = mp;
17682
17683       /* If we are inserting an 8-bytes aligned quantity and
17684          we have not already found an insertion point, then
17685          make sure that all such 8-byte aligned quantities are
17686          placed at the start of the pool.  */
17687       if (ARM_DOUBLEWORD_ALIGN
17688           && max_mp == NULL
17689           && fix->fix_size >= 8
17690           && mp->fix_size < 8)
17691         {
17692           max_mp = mp;
17693           max_address = mp->max_address;
17694         }
17695     }
17696
17697   /* The value is not currently in the minipool, so we need to create
17698      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17699      the end of the list since the placement is less constrained than
17700      any existing entry.  Otherwise, we insert the new fix before
17701      MAX_MP and, if necessary, adjust the constraints on the other
17702      entries.  */
17703   mp = XNEW (Mnode);
17704   mp->fix_size = fix->fix_size;
17705   mp->mode = fix->mode;
17706   mp->value = fix->value;
17707   mp->refcount = 1;
17708   /* Not yet required for a backwards ref.  */
17709   mp->min_address = -65536;
17710
17711   if (max_mp == NULL)
17712     {
17713       mp->max_address = max_address;
17714       mp->next = NULL;
17715       mp->prev = minipool_vector_tail;
17716
17717       if (mp->prev == NULL)
17718         {
17719           minipool_vector_head = mp;
17720           minipool_vector_label = gen_label_rtx ();
17721         }
17722       else
17723         mp->prev->next = mp;
17724
17725       minipool_vector_tail = mp;
17726     }
17727   else
17728     {
17729       if (max_address > max_mp->max_address - mp->fix_size)
17730         mp->max_address = max_mp->max_address - mp->fix_size;
17731       else
17732         mp->max_address = max_address;
17733
17734       mp->next = max_mp;
17735       mp->prev = max_mp->prev;
17736       max_mp->prev = mp;
17737       if (mp->prev != NULL)
17738         mp->prev->next = mp;
17739       else
17740         minipool_vector_head = mp;
17741     }
17742
17743   /* Save the new entry.  */
17744   max_mp = mp;
17745
17746   /* Scan over the preceding entries and adjust their addresses as
17747      required.  */
17748   while (mp->prev != NULL
17749          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17750     {
17751       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17752       mp = mp->prev;
17753     }
17754
17755   return max_mp;
17756 }
17757
17758 static Mnode *
17759 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17760                                 HOST_WIDE_INT  min_address)
17761 {
17762   HOST_WIDE_INT offset;
17763
17764   /* The code below assumes these are different.  */
17765   gcc_assert (mp != min_mp);
17766
17767   if (min_mp == NULL)
17768     {
17769       if (min_address > mp->min_address)
17770         mp->min_address = min_address;
17771     }
17772   else
17773     {
17774       /* We will adjust this below if it is too loose.  */
17775       mp->min_address = min_address;
17776
17777       /* Unlink MP from its current position.  Since min_mp is non-null,
17778          mp->next must be non-null.  */
17779       mp->next->prev = mp->prev;
17780       if (mp->prev != NULL)
17781         mp->prev->next = mp->next;
17782       else
17783         minipool_vector_head = mp->next;
17784
17785       /* Reinsert it after MIN_MP.  */
17786       mp->prev = min_mp;
17787       mp->next = min_mp->next;
17788       min_mp->next = mp;
17789       if (mp->next != NULL)
17790         mp->next->prev = mp;
17791       else
17792         minipool_vector_tail = mp;
17793     }
17794
17795   min_mp = mp;
17796
17797   offset = 0;
17798   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17799     {
17800       mp->offset = offset;
17801       if (mp->refcount > 0)
17802         offset += mp->fix_size;
17803
17804       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17805         mp->next->min_address = mp->min_address + mp->fix_size;
17806     }
17807
17808   return min_mp;
17809 }
17810
17811 /* Add a constant to the minipool for a backward reference.  Returns the
17812    node added or NULL if the constant will not fit in this pool.
17813
17814    Note that the code for insertion for a backwards reference can be
17815    somewhat confusing because the calculated offsets for each fix do
17816    not take into account the size of the pool (which is still under
17817    construction.  */
17818 static Mnode *
17819 add_minipool_backward_ref (Mfix *fix)
17820 {
17821   /* If set, min_mp is the last pool_entry that has a lower constraint
17822      than the one we are trying to add.  */
17823   Mnode *min_mp = NULL;
17824   /* This can be negative, since it is only a constraint.  */
17825   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17826   Mnode *mp;
17827
17828   /* If we can't reach the current pool from this insn, or if we can't
17829      insert this entry at the end of the pool without pushing other
17830      fixes out of range, then we don't try.  This ensures that we
17831      can't fail later on.  */
17832   if (min_address >= minipool_barrier->address
17833       || (minipool_vector_tail->min_address + fix->fix_size
17834           >= minipool_barrier->address))
17835     return NULL;
17836
17837   /* Scan the pool to see if a constant with the same value has
17838      already been added.  While we are doing this, also note the
17839      location where we must insert the constant if it doesn't already
17840      exist.  */
17841   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17842     {
17843       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17844           && fix->mode == mp->mode
17845           && (!LABEL_P (fix->value)
17846               || (CODE_LABEL_NUMBER (fix->value)
17847                   == CODE_LABEL_NUMBER (mp->value)))
17848           && rtx_equal_p (fix->value, mp->value)
17849           /* Check that there is enough slack to move this entry to the
17850              end of the table (this is conservative).  */
17851           && (mp->max_address
17852               > (minipool_barrier->address
17853                  + minipool_vector_tail->offset
17854                  + minipool_vector_tail->fix_size)))
17855         {
17856           mp->refcount++;
17857           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17858         }
17859
17860       if (min_mp != NULL)
17861         mp->min_address += fix->fix_size;
17862       else
17863         {
17864           /* Note the insertion point if necessary.  */
17865           if (mp->min_address < min_address)
17866             {
17867               /* For now, we do not allow the insertion of 8-byte alignment
17868                  requiring nodes anywhere but at the start of the pool.  */
17869               if (ARM_DOUBLEWORD_ALIGN
17870                   && fix->fix_size >= 8 && mp->fix_size < 8)
17871                 return NULL;
17872               else
17873                 min_mp = mp;
17874             }
17875           else if (mp->max_address
17876                    < minipool_barrier->address + mp->offset + fix->fix_size)
17877             {
17878               /* Inserting before this entry would push the fix beyond
17879                  its maximum address (which can happen if we have
17880                  re-located a forwards fix); force the new fix to come
17881                  after it.  */
17882               if (ARM_DOUBLEWORD_ALIGN
17883                   && fix->fix_size >= 8 && mp->fix_size < 8)
17884                 return NULL;
17885               else
17886                 {
17887                   min_mp = mp;
17888                   min_address = mp->min_address + fix->fix_size;
17889                 }
17890             }
17891           /* Do not insert a non-8-byte aligned quantity before 8-byte
17892              aligned quantities.  */
17893           else if (ARM_DOUBLEWORD_ALIGN
17894                    && fix->fix_size < 8
17895                    && mp->fix_size >= 8)
17896             {
17897               min_mp = mp;
17898               min_address = mp->min_address + fix->fix_size;
17899             }
17900         }
17901     }
17902
17903   /* We need to create a new entry.  */
17904   mp = XNEW (Mnode);
17905   mp->fix_size = fix->fix_size;
17906   mp->mode = fix->mode;
17907   mp->value = fix->value;
17908   mp->refcount = 1;
17909   mp->max_address = minipool_barrier->address + 65536;
17910
17911   mp->min_address = min_address;
17912
17913   if (min_mp == NULL)
17914     {
17915       mp->prev = NULL;
17916       mp->next = minipool_vector_head;
17917
17918       if (mp->next == NULL)
17919         {
17920           minipool_vector_tail = mp;
17921           minipool_vector_label = gen_label_rtx ();
17922         }
17923       else
17924         mp->next->prev = mp;
17925
17926       minipool_vector_head = mp;
17927     }
17928   else
17929     {
17930       mp->next = min_mp->next;
17931       mp->prev = min_mp;
17932       min_mp->next = mp;
17933
17934       if (mp->next != NULL)
17935         mp->next->prev = mp;
17936       else
17937         minipool_vector_tail = mp;
17938     }
17939
17940   /* Save the new entry.  */
17941   min_mp = mp;
17942
17943   if (mp->prev)
17944     mp = mp->prev;
17945   else
17946     mp->offset = 0;
17947
17948   /* Scan over the following entries and adjust their offsets.  */
17949   while (mp->next != NULL)
17950     {
17951       if (mp->next->min_address < mp->min_address + mp->fix_size)
17952         mp->next->min_address = mp->min_address + mp->fix_size;
17953
17954       if (mp->refcount)
17955         mp->next->offset = mp->offset + mp->fix_size;
17956       else
17957         mp->next->offset = mp->offset;
17958
17959       mp = mp->next;
17960     }
17961
17962   return min_mp;
17963 }
17964
17965 static void
17966 assign_minipool_offsets (Mfix *barrier)
17967 {
17968   HOST_WIDE_INT offset = 0;
17969   Mnode *mp;
17970
17971   minipool_barrier = barrier;
17972
17973   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17974     {
17975       mp->offset = offset;
17976
17977       if (mp->refcount > 0)
17978         offset += mp->fix_size;
17979     }
17980 }
17981
17982 /* Output the literal table */
17983 static void
17984 dump_minipool (rtx_insn *scan)
17985 {
17986   Mnode * mp;
17987   Mnode * nmp;
17988   int align64 = 0;
17989
17990   if (ARM_DOUBLEWORD_ALIGN)
17991     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17992       if (mp->refcount > 0 && mp->fix_size >= 8)
17993         {
17994           align64 = 1;
17995           break;
17996         }
17997
17998   if (dump_file)
17999     fprintf (dump_file,
18000              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18001              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18002
18003   scan = emit_label_after (gen_label_rtx (), scan);
18004   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18005   scan = emit_label_after (minipool_vector_label, scan);
18006
18007   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18008     {
18009       if (mp->refcount > 0)
18010         {
18011           if (dump_file)
18012             {
18013               fprintf (dump_file,
18014                        ";;  Offset %u, min %ld, max %ld ",
18015                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18016                        (unsigned long) mp->max_address);
18017               arm_print_value (dump_file, mp->value);
18018               fputc ('\n', dump_file);
18019             }
18020
18021           rtx val = copy_rtx (mp->value);
18022
18023           switch (GET_MODE_SIZE (mp->mode))
18024             {
18025 #ifdef HAVE_consttable_1
18026             case 1:
18027               scan = emit_insn_after (gen_consttable_1 (val), scan);
18028               break;
18029
18030 #endif
18031 #ifdef HAVE_consttable_2
18032             case 2:
18033               scan = emit_insn_after (gen_consttable_2 (val), scan);
18034               break;
18035
18036 #endif
18037 #ifdef HAVE_consttable_4
18038             case 4:
18039               scan = emit_insn_after (gen_consttable_4 (val), scan);
18040               break;
18041
18042 #endif
18043 #ifdef HAVE_consttable_8
18044             case 8:
18045               scan = emit_insn_after (gen_consttable_8 (val), scan);
18046               break;
18047
18048 #endif
18049 #ifdef HAVE_consttable_16
18050             case 16:
18051               scan = emit_insn_after (gen_consttable_16 (val), scan);
18052               break;
18053
18054 #endif
18055             default:
18056               gcc_unreachable ();
18057             }
18058         }
18059
18060       nmp = mp->next;
18061       free (mp);
18062     }
18063
18064   minipool_vector_head = minipool_vector_tail = NULL;
18065   scan = emit_insn_after (gen_consttable_end (), scan);
18066   scan = emit_barrier_after (scan);
18067 }
18068
18069 /* Return the cost of forcibly inserting a barrier after INSN.  */
18070 static int
18071 arm_barrier_cost (rtx_insn *insn)
18072 {
18073   /* Basing the location of the pool on the loop depth is preferable,
18074      but at the moment, the basic block information seems to be
18075      corrupt by this stage of the compilation.  */
18076   int base_cost = 50;
18077   rtx_insn *next = next_nonnote_insn (insn);
18078
18079   if (next != NULL && LABEL_P (next))
18080     base_cost -= 20;
18081
18082   switch (GET_CODE (insn))
18083     {
18084     case CODE_LABEL:
18085       /* It will always be better to place the table before the label, rather
18086          than after it.  */
18087       return 50;
18088
18089     case INSN:
18090     case CALL_INSN:
18091       return base_cost;
18092
18093     case JUMP_INSN:
18094       return base_cost - 10;
18095
18096     default:
18097       return base_cost + 10;
18098     }
18099 }
18100
18101 /* Find the best place in the insn stream in the range
18102    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18103    Create the barrier by inserting a jump and add a new fix entry for
18104    it.  */
18105 static Mfix *
18106 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18107 {
18108   HOST_WIDE_INT count = 0;
18109   rtx_barrier *barrier;
18110   rtx_insn *from = fix->insn;
18111   /* The instruction after which we will insert the jump.  */
18112   rtx_insn *selected = NULL;
18113   int selected_cost;
18114   /* The address at which the jump instruction will be placed.  */
18115   HOST_WIDE_INT selected_address;
18116   Mfix * new_fix;
18117   HOST_WIDE_INT max_count = max_address - fix->address;
18118   rtx_code_label *label = gen_label_rtx ();
18119
18120   selected_cost = arm_barrier_cost (from);
18121   selected_address = fix->address;
18122
18123   while (from && count < max_count)
18124     {
18125       rtx_jump_table_data *tmp;
18126       int new_cost;
18127
18128       /* This code shouldn't have been called if there was a natural barrier
18129          within range.  */
18130       gcc_assert (!BARRIER_P (from));
18131
18132       /* Count the length of this insn.  This must stay in sync with the
18133          code that pushes minipool fixes.  */
18134       if (LABEL_P (from))
18135         count += get_label_padding (from);
18136       else
18137         count += get_attr_length (from);
18138
18139       /* If there is a jump table, add its length.  */
18140       if (tablejump_p (from, NULL, &tmp))
18141         {
18142           count += get_jump_table_size (tmp);
18143
18144           /* Jump tables aren't in a basic block, so base the cost on
18145              the dispatch insn.  If we select this location, we will
18146              still put the pool after the table.  */
18147           new_cost = arm_barrier_cost (from);
18148
18149           if (count < max_count
18150               && (!selected || new_cost <= selected_cost))
18151             {
18152               selected = tmp;
18153               selected_cost = new_cost;
18154               selected_address = fix->address + count;
18155             }
18156
18157           /* Continue after the dispatch table.  */
18158           from = NEXT_INSN (tmp);
18159           continue;
18160         }
18161
18162       new_cost = arm_barrier_cost (from);
18163
18164       if (count < max_count
18165           && (!selected || new_cost <= selected_cost))
18166         {
18167           selected = from;
18168           selected_cost = new_cost;
18169           selected_address = fix->address + count;
18170         }
18171
18172       from = NEXT_INSN (from);
18173     }
18174
18175   /* Make sure that we found a place to insert the jump.  */
18176   gcc_assert (selected);
18177
18178   /* Create a new JUMP_INSN that branches around a barrier.  */
18179   from = emit_jump_insn_after (gen_jump (label), selected);
18180   JUMP_LABEL (from) = label;
18181   barrier = emit_barrier_after (from);
18182   emit_label_after (label, barrier);
18183
18184   /* Create a minipool barrier entry for the new barrier.  */
18185   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18186   new_fix->insn = barrier;
18187   new_fix->address = selected_address;
18188   new_fix->next = fix->next;
18189   fix->next = new_fix;
18190
18191   return new_fix;
18192 }
18193
18194 /* Record that there is a natural barrier in the insn stream at
18195    ADDRESS.  */
18196 static void
18197 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18198 {
18199   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18200
18201   fix->insn = insn;
18202   fix->address = address;
18203
18204   fix->next = NULL;
18205   if (minipool_fix_head != NULL)
18206     minipool_fix_tail->next = fix;
18207   else
18208     minipool_fix_head = fix;
18209
18210   minipool_fix_tail = fix;
18211 }
18212
18213 /* Record INSN, which will need fixing up to load a value from the
18214    minipool.  ADDRESS is the offset of the insn since the start of the
18215    function; LOC is a pointer to the part of the insn which requires
18216    fixing; VALUE is the constant that must be loaded, which is of type
18217    MODE.  */
18218 static void
18219 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18220                    machine_mode mode, rtx value)
18221 {
18222   gcc_assert (!arm_disable_literal_pool);
18223   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18224
18225   fix->insn = insn;
18226   fix->address = address;
18227   fix->loc = loc;
18228   fix->mode = mode;
18229   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18230   fix->value = value;
18231   fix->forwards = get_attr_pool_range (insn);
18232   fix->backwards = get_attr_neg_pool_range (insn);
18233   fix->minipool = NULL;
18234
18235   /* If an insn doesn't have a range defined for it, then it isn't
18236      expecting to be reworked by this code.  Better to stop now than
18237      to generate duff assembly code.  */
18238   gcc_assert (fix->forwards || fix->backwards);
18239
18240   /* If an entry requires 8-byte alignment then assume all constant pools
18241      require 4 bytes of padding.  Trying to do this later on a per-pool
18242      basis is awkward because existing pool entries have to be modified.  */
18243   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18244     minipool_pad = 4;
18245
18246   if (dump_file)
18247     {
18248       fprintf (dump_file,
18249                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18250                GET_MODE_NAME (mode),
18251                INSN_UID (insn), (unsigned long) address,
18252                -1 * (long)fix->backwards, (long)fix->forwards);
18253       arm_print_value (dump_file, fix->value);
18254       fprintf (dump_file, "\n");
18255     }
18256
18257   /* Add it to the chain of fixes.  */
18258   fix->next = NULL;
18259
18260   if (minipool_fix_head != NULL)
18261     minipool_fix_tail->next = fix;
18262   else
18263     minipool_fix_head = fix;
18264
18265   minipool_fix_tail = fix;
18266 }
18267
18268 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18269    Returns the number of insns needed, or 99 if we always want to synthesize
18270    the value.  */
18271 int
18272 arm_max_const_double_inline_cost ()
18273 {
18274   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18275 }
18276
18277 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18278    Returns the number of insns needed, or 99 if we don't know how to
18279    do it.  */
18280 int
18281 arm_const_double_inline_cost (rtx val)
18282 {
18283   rtx lowpart, highpart;
18284   machine_mode mode;
18285
18286   mode = GET_MODE (val);
18287
18288   if (mode == VOIDmode)
18289     mode = DImode;
18290
18291   gcc_assert (GET_MODE_SIZE (mode) == 8);
18292
18293   lowpart = gen_lowpart (SImode, val);
18294   highpart = gen_highpart_mode (SImode, mode, val);
18295
18296   gcc_assert (CONST_INT_P (lowpart));
18297   gcc_assert (CONST_INT_P (highpart));
18298
18299   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18300                             NULL_RTX, NULL_RTX, 0, 0)
18301           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18302                               NULL_RTX, NULL_RTX, 0, 0));
18303 }
18304
18305 /* Cost of loading a SImode constant.  */
18306 static inline int
18307 arm_const_inline_cost (enum rtx_code code, rtx val)
18308 {
18309   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18310                            NULL_RTX, NULL_RTX, 1, 0);
18311 }
18312
18313 /* Return true if it is worthwhile to split a 64-bit constant into two
18314    32-bit operations.  This is the case if optimizing for size, or
18315    if we have load delay slots, or if one 32-bit part can be done with
18316    a single data operation.  */
18317 bool
18318 arm_const_double_by_parts (rtx val)
18319 {
18320   machine_mode mode = GET_MODE (val);
18321   rtx part;
18322
18323   if (optimize_size || arm_ld_sched)
18324     return true;
18325
18326   if (mode == VOIDmode)
18327     mode = DImode;
18328
18329   part = gen_highpart_mode (SImode, mode, val);
18330
18331   gcc_assert (CONST_INT_P (part));
18332
18333   if (const_ok_for_arm (INTVAL (part))
18334       || const_ok_for_arm (~INTVAL (part)))
18335     return true;
18336
18337   part = gen_lowpart (SImode, val);
18338
18339   gcc_assert (CONST_INT_P (part));
18340
18341   if (const_ok_for_arm (INTVAL (part))
18342       || const_ok_for_arm (~INTVAL (part)))
18343     return true;
18344
18345   return false;
18346 }
18347
18348 /* Return true if it is possible to inline both the high and low parts
18349    of a 64-bit constant into 32-bit data processing instructions.  */
18350 bool
18351 arm_const_double_by_immediates (rtx val)
18352 {
18353   machine_mode mode = GET_MODE (val);
18354   rtx part;
18355
18356   if (mode == VOIDmode)
18357     mode = DImode;
18358
18359   part = gen_highpart_mode (SImode, mode, val);
18360
18361   gcc_assert (CONST_INT_P (part));
18362
18363   if (!const_ok_for_arm (INTVAL (part)))
18364     return false;
18365
18366   part = gen_lowpart (SImode, val);
18367
18368   gcc_assert (CONST_INT_P (part));
18369
18370   if (!const_ok_for_arm (INTVAL (part)))
18371     return false;
18372
18373   return true;
18374 }
18375
18376 /* Scan INSN and note any of its operands that need fixing.
18377    If DO_PUSHES is false we do not actually push any of the fixups
18378    needed.  */
18379 static void
18380 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18381 {
18382   int opno;
18383
18384   extract_constrain_insn (insn);
18385
18386   if (recog_data.n_alternatives == 0)
18387     return;
18388
18389   /* Fill in recog_op_alt with information about the constraints of
18390      this insn.  */
18391   preprocess_constraints (insn);
18392
18393   const operand_alternative *op_alt = which_op_alt ();
18394   for (opno = 0; opno < recog_data.n_operands; opno++)
18395     {
18396       /* Things we need to fix can only occur in inputs.  */
18397       if (recog_data.operand_type[opno] != OP_IN)
18398         continue;
18399
18400       /* If this alternative is a memory reference, then any mention
18401          of constants in this alternative is really to fool reload
18402          into allowing us to accept one there.  We need to fix them up
18403          now so that we output the right code.  */
18404       if (op_alt[opno].memory_ok)
18405         {
18406           rtx op = recog_data.operand[opno];
18407
18408           if (CONSTANT_P (op))
18409             {
18410               if (do_pushes)
18411                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18412                                    recog_data.operand_mode[opno], op);
18413             }
18414           else if (MEM_P (op)
18415                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18416                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18417             {
18418               if (do_pushes)
18419                 {
18420                   rtx cop = avoid_constant_pool_reference (op);
18421
18422                   /* Casting the address of something to a mode narrower
18423                      than a word can cause avoid_constant_pool_reference()
18424                      to return the pool reference itself.  That's no good to
18425                      us here.  Lets just hope that we can use the
18426                      constant pool value directly.  */
18427                   if (op == cop)
18428                     cop = get_pool_constant (XEXP (op, 0));
18429
18430                   push_minipool_fix (insn, address,
18431                                      recog_data.operand_loc[opno],
18432                                      recog_data.operand_mode[opno], cop);
18433                 }
18434
18435             }
18436         }
18437     }
18438
18439   return;
18440 }
18441
18442 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18443    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18444    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18445    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18446    or four masks, depending on whether it is being computed for a
18447    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18448    respectively.  The tree for the type of the argument or a field within an
18449    argument is passed in ARG_TYPE, the current register this argument or field
18450    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18451    argument or field starts at is passed in STARTING_BIT and the last used bit
18452    is kept in LAST_USED_BIT which is also updated accordingly.  */
18453
18454 static unsigned HOST_WIDE_INT
18455 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18456                                uint32_t * padding_bits_to_clear,
18457                                unsigned starting_bit, int * last_used_bit)
18458
18459 {
18460   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18461
18462   if (TREE_CODE (arg_type) == RECORD_TYPE)
18463     {
18464       unsigned current_bit = starting_bit;
18465       tree field;
18466       long int offset, size;
18467
18468
18469       field = TYPE_FIELDS (arg_type);
18470       while (field)
18471         {
18472           /* The offset within a structure is always an offset from
18473              the start of that structure.  Make sure we take that into the
18474              calculation of the register based offset that we use here.  */
18475           offset = starting_bit;
18476           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18477           offset %= 32;
18478
18479           /* This is the actual size of the field, for bitfields this is the
18480              bitfield width and not the container size.  */
18481           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18482
18483           if (*last_used_bit != offset)
18484             {
18485               if (offset < *last_used_bit)
18486                 {
18487                   /* This field's offset is before the 'last_used_bit', that
18488                      means this field goes on the next register.  So we need to
18489                      pad the rest of the current register and increase the
18490                      register number.  */
18491                   uint32_t mask;
18492                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18493                   mask++;
18494
18495                   padding_bits_to_clear[*regno] |= mask;
18496                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18497                   (*regno)++;
18498                 }
18499               else
18500                 {
18501                   /* Otherwise we pad the bits between the last field's end and
18502                      the start of the new field.  */
18503                   uint32_t mask;
18504
18505                   mask = ((uint32_t)-1) >> (32 - offset);
18506                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18507                   padding_bits_to_clear[*regno] |= mask;
18508                 }
18509               current_bit = offset;
18510             }
18511
18512           /* Calculate further padding bits for inner structs/unions too.  */
18513           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18514             {
18515               *last_used_bit = current_bit;
18516               not_to_clear_reg_mask
18517                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18518                                                   padding_bits_to_clear, offset,
18519                                                   last_used_bit);
18520             }
18521           else
18522             {
18523               /* Update 'current_bit' with this field's size.  If the
18524                  'current_bit' lies in a subsequent register, update 'regno' and
18525                  reset 'current_bit' to point to the current bit in that new
18526                  register.  */
18527               current_bit += size;
18528               while (current_bit >= 32)
18529                 {
18530                   current_bit-=32;
18531                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18532                   (*regno)++;
18533                 }
18534               *last_used_bit = current_bit;
18535             }
18536
18537           field = TREE_CHAIN (field);
18538         }
18539       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18540     }
18541   else if (TREE_CODE (arg_type) == UNION_TYPE)
18542     {
18543       tree field, field_t;
18544       int i, regno_t, field_size;
18545       int max_reg = -1;
18546       int max_bit = -1;
18547       uint32_t mask;
18548       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18549         = {-1, -1, -1, -1};
18550
18551       /* To compute the padding bits in a union we only consider bits as
18552          padding bits if they are always either a padding bit or fall outside a
18553          fields size for all fields in the union.  */
18554       field = TYPE_FIELDS (arg_type);
18555       while (field)
18556         {
18557           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18558             = {0U, 0U, 0U, 0U};
18559           int last_used_bit_t = *last_used_bit;
18560           regno_t = *regno;
18561           field_t = TREE_TYPE (field);
18562
18563           /* If the field's type is either a record or a union make sure to
18564              compute their padding bits too.  */
18565           if (RECORD_OR_UNION_TYPE_P (field_t))
18566             not_to_clear_reg_mask
18567               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18568                                                 &padding_bits_to_clear_t[0],
18569                                                 starting_bit, &last_used_bit_t);
18570           else
18571             {
18572               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18573               regno_t = (field_size / 32) + *regno;
18574               last_used_bit_t = (starting_bit + field_size) % 32;
18575             }
18576
18577           for (i = *regno; i < regno_t; i++)
18578             {
18579               /* For all but the last register used by this field only keep the
18580                  padding bits that were padding bits in this field.  */
18581               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18582             }
18583
18584             /* For the last register, keep all padding bits that were padding
18585                bits in this field and any padding bits that are still valid
18586                as padding bits but fall outside of this field's size.  */
18587             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18588             padding_bits_to_clear_res[regno_t]
18589               &= padding_bits_to_clear_t[regno_t] | mask;
18590
18591           /* Update the maximum size of the fields in terms of registers used
18592              ('max_reg') and the 'last_used_bit' in said register.  */
18593           if (max_reg < regno_t)
18594             {
18595               max_reg = regno_t;
18596               max_bit = last_used_bit_t;
18597             }
18598           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18599             max_bit = last_used_bit_t;
18600
18601           field = TREE_CHAIN (field);
18602         }
18603
18604       /* Update the current padding_bits_to_clear using the intersection of the
18605          padding bits of all the fields.  */
18606       for (i=*regno; i < max_reg; i++)
18607         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18608
18609       /* Do not keep trailing padding bits, we do not know yet whether this
18610          is the end of the argument.  */
18611       mask = ((uint32_t) 1 << max_bit) - 1;
18612       padding_bits_to_clear[max_reg]
18613         |= padding_bits_to_clear_res[max_reg] & mask;
18614
18615       *regno = max_reg;
18616       *last_used_bit = max_bit;
18617     }
18618   else
18619     /* This function should only be used for structs and unions.  */
18620     gcc_unreachable ();
18621
18622   return not_to_clear_reg_mask;
18623 }
18624
18625 /* In the context of ARMv8-M Security Extensions, this function is used for both
18626    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18627    registers are used when returning or passing arguments, which is then
18628    returned as a mask.  It will also compute a mask to indicate padding/unused
18629    bits for each of these registers, and passes this through the
18630    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18631    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18632    the starting register used to pass this argument or return value is passed
18633    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18634    for struct and union types.  */
18635
18636 static unsigned HOST_WIDE_INT
18637 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18638                              uint32_t * padding_bits_to_clear)
18639
18640 {
18641   int last_used_bit = 0;
18642   unsigned HOST_WIDE_INT not_to_clear_mask;
18643
18644   if (RECORD_OR_UNION_TYPE_P (arg_type))
18645     {
18646       not_to_clear_mask
18647         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18648                                          padding_bits_to_clear, 0,
18649                                          &last_used_bit);
18650
18651
18652       /* If the 'last_used_bit' is not zero, that means we are still using a
18653          part of the last 'regno'.  In such cases we must clear the trailing
18654          bits.  Otherwise we are not using regno and we should mark it as to
18655          clear.  */
18656       if (last_used_bit != 0)
18657         padding_bits_to_clear[regno]
18658           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18659       else
18660         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18661     }
18662   else
18663     {
18664       not_to_clear_mask = 0;
18665       /* We are not dealing with structs nor unions.  So these arguments may be
18666          passed in floating point registers too.  In some cases a BLKmode is
18667          used when returning or passing arguments in multiple VFP registers.  */
18668       if (GET_MODE (arg_rtx) == BLKmode)
18669         {
18670           int i, arg_regs;
18671           rtx reg;
18672
18673           /* This should really only occur when dealing with the hard-float
18674              ABI.  */
18675           gcc_assert (TARGET_HARD_FLOAT_ABI);
18676
18677           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18678             {
18679               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18680               gcc_assert (REG_P (reg));
18681
18682               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18683
18684               /* If we are dealing with DF mode, make sure we don't
18685                  clear either of the registers it addresses.  */
18686               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18687               if (arg_regs > 1)
18688                 {
18689                   unsigned HOST_WIDE_INT mask;
18690                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18691                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18692                   not_to_clear_mask |= mask;
18693                 }
18694             }
18695         }
18696       else
18697         {
18698           /* Otherwise we can rely on the MODE to determine how many registers
18699              are being used by this argument.  */
18700           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18701           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18702           if (arg_regs > 1)
18703             {
18704               unsigned HOST_WIDE_INT
18705               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18706               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18707               not_to_clear_mask |= mask;
18708             }
18709         }
18710     }
18711
18712   return not_to_clear_mask;
18713 }
18714
18715 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18716    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18717    are to be fully cleared, using the value in register CLEARING_REG if more
18718    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18719    the bits that needs to be cleared in caller-saved core registers, with
18720    SCRATCH_REG used as a scratch register for that clearing.
18721
18722    NOTE: one of three following assertions must hold:
18723    - SCRATCH_REG is a low register
18724    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18725      in TO_CLEAR_BITMAP)
18726    - CLEARING_REG is a low register.  */
18727
18728 static void
18729 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18730                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18731 {
18732   bool saved_clearing = false;
18733   rtx saved_clearing_reg = NULL_RTX;
18734   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18735
18736   gcc_assert (arm_arch_cmse);
18737
18738   if (!bitmap_empty_p (to_clear_bitmap))
18739     {
18740       minregno = bitmap_first_set_bit (to_clear_bitmap);
18741       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18742     }
18743   clearing_regno = REGNO (clearing_reg);
18744
18745   /* Clear padding bits.  */
18746   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18747   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18748     {
18749       uint64_t mask;
18750       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18751
18752       if (padding_bits_to_clear[i] == 0)
18753         continue;
18754
18755       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18756          CLEARING_REG as scratch.  */
18757       if (TARGET_THUMB1
18758           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18759         {
18760           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18761              such that we can use clearing_reg to clear the unused bits in the
18762              arguments.  */
18763           if ((clearing_regno > maxregno
18764                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18765               && !saved_clearing)
18766             {
18767               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18768               emit_move_insn (scratch_reg, clearing_reg);
18769               saved_clearing = true;
18770               saved_clearing_reg = scratch_reg;
18771             }
18772           scratch_reg = clearing_reg;
18773         }
18774
18775       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18776       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18777       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18778
18779       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18780       mask = (~padding_bits_to_clear[i]) >> 16;
18781       rtx16 = gen_int_mode (16, SImode);
18782       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18783       if (mask)
18784         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18785
18786       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18787     }
18788   if (saved_clearing)
18789     emit_move_insn (clearing_reg, saved_clearing_reg);
18790
18791
18792   /* Clear full registers.  */
18793
18794   if (TARGET_HAVE_FPCXT_CMSE)
18795     {
18796       rtvec vunspec_vec;
18797       int i, j, k, nb_regs;
18798       rtx use_seq, par, reg, set, vunspec;
18799       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18800       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18801       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18802
18803       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18804         {
18805           /* Find next register to clear and exit if none.  */
18806           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18807           if (i > maxregno)
18808             break;
18809
18810           /* Compute number of consecutive registers to clear.  */
18811           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18812                j++);
18813           nb_regs = j - i;
18814
18815           /* Create VSCCLRM RTX pattern.  */
18816           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18817           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18818           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18819                                              VUNSPEC_VSCCLRM_VPR);
18820           XVECEXP (par, 0, 0) = vunspec;
18821
18822           /* Insert VFP register clearing RTX in the pattern.  */
18823           start_sequence ();
18824           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18825             {
18826               if (!bitmap_bit_p (to_clear_bitmap, j))
18827                 continue;
18828
18829               reg = gen_rtx_REG (SFmode, j);
18830               set = gen_rtx_SET (reg, const0_rtx);
18831               XVECEXP (par, 0, k++) = set;
18832               emit_use (reg);
18833             }
18834           use_seq = get_insns ();
18835           end_sequence ();
18836
18837           emit_insn_after (use_seq, emit_insn (par));
18838         }
18839
18840       /* Get set of core registers to clear.  */
18841       bitmap_clear (core_regs_bitmap);
18842       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18843                         IP_REGNUM - R0_REGNUM + 1);
18844       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18845                   core_regs_bitmap);
18846       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18847
18848       if (bitmap_empty_p (to_clear_core_bitmap))
18849         return;
18850
18851       /* Create clrm RTX pattern.  */
18852       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18853       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18854
18855       /* Insert core register clearing RTX in the pattern.  */
18856       start_sequence ();
18857       for (j = 0, i = minregno; j < nb_regs; i++)
18858         {
18859           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18860             continue;
18861
18862           reg = gen_rtx_REG (SImode, i);
18863           set = gen_rtx_SET (reg, const0_rtx);
18864           XVECEXP (par, 0, j++) = set;
18865           emit_use (reg);
18866         }
18867
18868       /* Insert APSR register clearing RTX in the pattern
18869        * along with clobbering CC.  */
18870       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18871       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18872                                          VUNSPEC_CLRM_APSR);
18873
18874       XVECEXP (par, 0, j++) = vunspec;
18875
18876       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18877       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18878       XVECEXP (par, 0, j) = clobber;
18879
18880       use_seq = get_insns ();
18881       end_sequence ();
18882
18883       emit_insn_after (use_seq, emit_insn (par));
18884     }
18885   else
18886     {
18887       /* If not marked for clearing, clearing_reg already does not contain
18888          any secret.  */
18889       if (clearing_regno <= maxregno
18890           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18891         {
18892           emit_move_insn (clearing_reg, const0_rtx);
18893           emit_use (clearing_reg);
18894           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18895         }
18896
18897       for (regno = minregno; regno <= maxregno; regno++)
18898         {
18899           if (!bitmap_bit_p (to_clear_bitmap, regno))
18900             continue;
18901
18902           if (IS_VFP_REGNUM (regno))
18903             {
18904               /* If regno is an even vfp register and its successor is also to
18905                  be cleared, use vmov.  */
18906               if (TARGET_VFP_DOUBLE
18907                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18908                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18909                 {
18910                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18911                                   CONST1_RTX (DFmode));
18912                   emit_use (gen_rtx_REG (DFmode, regno));
18913                   regno++;
18914                 }
18915               else
18916                 {
18917                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18918                                   CONST1_RTX (SFmode));
18919                   emit_use (gen_rtx_REG (SFmode, regno));
18920                 }
18921             }
18922           else
18923             {
18924               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18925               emit_use (gen_rtx_REG (SImode, regno));
18926             }
18927         }
18928     }
18929 }
18930
18931 /* Clear core and caller-saved VFP registers not used to pass arguments before
18932    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18933    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18934    libgcc/config/arm/cmse_nonsecure_call.S.  */
18935
18936 static void
18937 cmse_nonsecure_call_inline_register_clear (void)
18938 {
18939   basic_block bb;
18940
18941   FOR_EACH_BB_FN (bb, cfun)
18942     {
18943       rtx_insn *insn;
18944
18945       FOR_BB_INSNS (bb, insn)
18946         {
18947           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18948           /* frame = VFP regs + FPSCR + VPR.  */
18949           unsigned lazy_store_stack_frame_size
18950             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18951           unsigned long callee_saved_mask
18952             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18953             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18954           unsigned address_regnum, regno;
18955           unsigned max_int_regno
18956             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18957           unsigned max_fp_regno
18958             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18959           unsigned maxregno
18960             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18961           auto_sbitmap to_clear_bitmap (maxregno + 1);
18962           rtx_insn *seq;
18963           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18964           rtx address;
18965           CUMULATIVE_ARGS args_so_far_v;
18966           cumulative_args_t args_so_far;
18967           tree arg_type, fntype;
18968           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18969           function_args_iterator args_iter;
18970           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18971
18972           if (!NONDEBUG_INSN_P (insn))
18973             continue;
18974
18975           if (!CALL_P (insn))
18976             continue;
18977
18978           pat = PATTERN (insn);
18979           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18980           call = XVECEXP (pat, 0, 0);
18981
18982           /* Get the real call RTX if the insn sets a value, ie. returns.  */
18983           if (GET_CODE (call) == SET)
18984               call = SET_SRC (call);
18985
18986           /* Check if it is a cmse_nonsecure_call.  */
18987           unspec = XEXP (call, 0);
18988           if (GET_CODE (unspec) != UNSPEC
18989               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18990             continue;
18991
18992           /* Mark registers that needs to be cleared.  Those that holds a
18993              parameter are removed from the set further below.  */
18994           bitmap_clear (to_clear_bitmap);
18995           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18996                             max_int_regno - R0_REGNUM + 1);
18997
18998           /* Only look at the caller-saved floating point registers in case of
18999              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
19000              lazy store and loads which clear both caller- and callee-saved
19001              registers.  */
19002           if (!lazy_fpclear)
19003             {
19004               auto_sbitmap float_bitmap (maxregno + 1);
19005
19006               bitmap_clear (float_bitmap);
19007               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19008                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
19009               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19010             }
19011
19012           /* Make sure the register used to hold the function address is not
19013              cleared.  */
19014           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19015           gcc_assert (MEM_P (address));
19016           gcc_assert (REG_P (XEXP (address, 0)));
19017           address_regnum = REGNO (XEXP (address, 0));
19018           if (address_regnum <= max_int_regno)
19019             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19020
19021           /* Set basic block of call insn so that df rescan is performed on
19022              insns inserted here.  */
19023           set_block_for_insn (insn, bb);
19024           df_set_flags (DF_DEFER_INSN_RESCAN);
19025           start_sequence ();
19026
19027           /* Make sure the scheduler doesn't schedule other insns beyond
19028              here.  */
19029           emit_insn (gen_blockage ());
19030
19031           /* Walk through all arguments and clear registers appropriately.
19032           */
19033           fntype = TREE_TYPE (MEM_EXPR (address));
19034           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19035                                     NULL_TREE);
19036           args_so_far = pack_cumulative_args (&args_so_far_v);
19037           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19038             {
19039               rtx arg_rtx;
19040               uint64_t to_clear_args_mask;
19041
19042               if (VOID_TYPE_P (arg_type))
19043                 continue;
19044
19045               function_arg_info arg (arg_type, /*named=*/true);
19046               if (!first_param)
19047                 /* ??? We should advance after processing the argument and pass
19048                    the argument we're advancing past.  */
19049                 arm_function_arg_advance (args_so_far, arg);
19050
19051               arg_rtx = arm_function_arg (args_so_far, arg);
19052               gcc_assert (REG_P (arg_rtx));
19053               to_clear_args_mask
19054                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19055                                              REGNO (arg_rtx),
19056                                              &padding_bits_to_clear[0]);
19057               if (to_clear_args_mask)
19058                 {
19059                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19060                     {
19061                       if (to_clear_args_mask & (1ULL << regno))
19062                         bitmap_clear_bit (to_clear_bitmap, regno);
19063                     }
19064                 }
19065
19066               first_param = false;
19067             }
19068
19069           /* We use right shift and left shift to clear the LSB of the address
19070              we jump to instead of using bic, to avoid having to use an extra
19071              register on Thumb-1.  */
19072           clearing_reg = XEXP (address, 0);
19073           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19074           emit_insn (gen_rtx_SET (clearing_reg, shift));
19075           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19076           emit_insn (gen_rtx_SET (clearing_reg, shift));
19077
19078           if (clear_callee_saved)
19079             {
19080               rtx push_insn =
19081                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19082               /* Disable frame debug info in push because it needs to be
19083                  disabled for pop (see below).  */
19084               RTX_FRAME_RELATED_P (push_insn) = 0;
19085
19086               /* Lazy store multiple.  */
19087               if (lazy_fpclear)
19088                 {
19089                   rtx imm;
19090                   rtx_insn *add_insn;
19091
19092                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19093                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19094                                                     stack_pointer_rtx, imm));
19095                   /* If we have the frame pointer, then it will be the
19096                      CFA reg.  Otherwise, the stack pointer is the CFA
19097                      reg, so we need to emit a CFA adjust.  */
19098                   if (!frame_pointer_needed)
19099                     arm_add_cfa_adjust_cfa_note (add_insn,
19100                                                  - lazy_store_stack_frame_size,
19101                                                  stack_pointer_rtx,
19102                                                  stack_pointer_rtx);
19103                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19104                 }
19105               /* Save VFP callee-saved registers.  */
19106               else
19107                 {
19108                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19109                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19110                   /* Disable frame debug info in push because it needs to be
19111                      disabled for vpop (see below).  */
19112                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19113                 }
19114             }
19115
19116           /* Clear caller-saved registers that leak before doing a non-secure
19117              call.  */
19118           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19119           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19120                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19121
19122           seq = get_insns ();
19123           end_sequence ();
19124           emit_insn_before (seq, insn);
19125
19126           if (TARGET_HAVE_FPCXT_CMSE)
19127             {
19128               rtx_insn *last, *pop_insn, *after = insn;
19129
19130               start_sequence ();
19131
19132               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19133               if (lazy_fpclear)
19134                 {
19135                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19136                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19137                   rtx_insn *add_insn =
19138                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19139                                            stack_pointer_rtx, imm));
19140                   if (!frame_pointer_needed)
19141                     arm_add_cfa_adjust_cfa_note (add_insn,
19142                                                  lazy_store_stack_frame_size,
19143                                                  stack_pointer_rtx,
19144                                                  stack_pointer_rtx);
19145                 }
19146               /* Restore VFP callee-saved registers.  */
19147               else
19148                 {
19149                   int nb_callee_saved_vfp_regs =
19150                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19151                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19152                                               nb_callee_saved_vfp_regs,
19153                                               stack_pointer_rtx);
19154                   /* Disable frame debug info in vpop because the SP adjustment
19155                      is made using a CFA adjustment note while CFA used is
19156                      sometimes R7.  This then causes an assert failure in the
19157                      CFI note creation code.  */
19158                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19159                 }
19160
19161               arm_emit_multi_reg_pop (callee_saved_mask);
19162               pop_insn = get_last_insn ();
19163
19164               /* Disable frame debug info in pop because they reset the state
19165                  of popped registers to what it was at the beginning of the
19166                  function, before the prologue.  This leads to incorrect state
19167                  when doing the pop after the nonsecure call for registers that
19168                  are pushed both in prologue and before the nonsecure call.
19169
19170                  It also occasionally triggers an assert failure in CFI note
19171                  creation code when there are two codepaths to the epilogue,
19172                  one of which does not go through the nonsecure call.
19173                  Obviously this mean that debugging between the push and pop is
19174                  not reliable.  */
19175               RTX_FRAME_RELATED_P (pop_insn) = 0;
19176
19177               seq = get_insns ();
19178               last = get_last_insn ();
19179               end_sequence ();
19180
19181               emit_insn_after (seq, after);
19182
19183               /* Skip pop we have just inserted after nonsecure call, we know
19184                  it does not contain a nonsecure call.  */
19185               insn = last;
19186             }
19187         }
19188     }
19189 }
19190
19191 /* Rewrite move insn into subtract of 0 if the condition codes will
19192    be useful in next conditional jump insn.  */
19193
19194 static void
19195 thumb1_reorg (void)
19196 {
19197   basic_block bb;
19198
19199   FOR_EACH_BB_FN (bb, cfun)
19200     {
19201       rtx dest, src;
19202       rtx cmp, op0, op1, set = NULL;
19203       rtx_insn *prev, *insn = BB_END (bb);
19204       bool insn_clobbered = false;
19205
19206       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19207         insn = PREV_INSN (insn);
19208
19209       /* Find the last cbranchsi4_insn in basic block BB.  */
19210       if (insn == BB_HEAD (bb)
19211           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19212         continue;
19213
19214       /* Get the register with which we are comparing.  */
19215       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19216       op0 = XEXP (cmp, 0);
19217       op1 = XEXP (cmp, 1);
19218
19219       /* Check that comparison is against ZERO.  */
19220       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19221         continue;
19222
19223       /* Find the first flag setting insn before INSN in basic block BB.  */
19224       gcc_assert (insn != BB_HEAD (bb));
19225       for (prev = PREV_INSN (insn);
19226            (!insn_clobbered
19227             && prev != BB_HEAD (bb)
19228             && (NOTE_P (prev)
19229                 || DEBUG_INSN_P (prev)
19230                 || ((set = single_set (prev)) != NULL
19231                     && get_attr_conds (prev) == CONDS_NOCOND)));
19232            prev = PREV_INSN (prev))
19233         {
19234           if (reg_set_p (op0, prev))
19235             insn_clobbered = true;
19236         }
19237
19238       /* Skip if op0 is clobbered by insn other than prev. */
19239       if (insn_clobbered)
19240         continue;
19241
19242       if (!set)
19243         continue;
19244
19245       dest = SET_DEST (set);
19246       src = SET_SRC (set);
19247       if (!low_register_operand (dest, SImode)
19248           || !low_register_operand (src, SImode))
19249         continue;
19250
19251       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19252          in INSN.  Both src and dest of the move insn are checked.  */
19253       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19254         {
19255           dest = copy_rtx (dest);
19256           src = copy_rtx (src);
19257           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19258           PATTERN (prev) = gen_rtx_SET (dest, src);
19259           INSN_CODE (prev) = -1;
19260           /* Set test register in INSN to dest.  */
19261           XEXP (cmp, 0) = copy_rtx (dest);
19262           INSN_CODE (insn) = -1;
19263         }
19264     }
19265 }
19266
19267 /* Convert instructions to their cc-clobbering variant if possible, since
19268    that allows us to use smaller encodings.  */
19269
19270 static void
19271 thumb2_reorg (void)
19272 {
19273   basic_block bb;
19274   regset_head live;
19275
19276   INIT_REG_SET (&live);
19277
19278   /* We are freeing block_for_insn in the toplev to keep compatibility
19279      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19280   compute_bb_for_insn ();
19281   df_analyze ();
19282
19283   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19284
19285   FOR_EACH_BB_FN (bb, cfun)
19286     {
19287       if ((current_tune->disparage_flag_setting_t16_encodings
19288            == tune_params::DISPARAGE_FLAGS_ALL)
19289           && optimize_bb_for_speed_p (bb))
19290         continue;
19291
19292       rtx_insn *insn;
19293       Convert_Action action = SKIP;
19294       Convert_Action action_for_partial_flag_setting
19295         = ((current_tune->disparage_flag_setting_t16_encodings
19296             != tune_params::DISPARAGE_FLAGS_NEITHER)
19297            && optimize_bb_for_speed_p (bb))
19298           ? SKIP : CONV;
19299
19300       COPY_REG_SET (&live, DF_LR_OUT (bb));
19301       df_simulate_initialize_backwards (bb, &live);
19302       FOR_BB_INSNS_REVERSE (bb, insn)
19303         {
19304           if (NONJUMP_INSN_P (insn)
19305               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19306               && GET_CODE (PATTERN (insn)) == SET)
19307             {
19308               action = SKIP;
19309               rtx pat = PATTERN (insn);
19310               rtx dst = XEXP (pat, 0);
19311               rtx src = XEXP (pat, 1);
19312               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19313
19314               if (UNARY_P (src) || BINARY_P (src))
19315                   op0 = XEXP (src, 0);
19316
19317               if (BINARY_P (src))
19318                   op1 = XEXP (src, 1);
19319
19320               if (low_register_operand (dst, SImode))
19321                 {
19322                   switch (GET_CODE (src))
19323                     {
19324                     case PLUS:
19325                       /* Adding two registers and storing the result
19326                          in the first source is already a 16-bit
19327                          operation.  */
19328                       if (rtx_equal_p (dst, op0)
19329                           && register_operand (op1, SImode))
19330                         break;
19331
19332                       if (low_register_operand (op0, SImode))
19333                         {
19334                           /* ADDS <Rd>,<Rn>,<Rm>  */
19335                           if (low_register_operand (op1, SImode))
19336                             action = CONV;
19337                           /* ADDS <Rdn>,#<imm8>  */
19338                           /* SUBS <Rdn>,#<imm8>  */
19339                           else if (rtx_equal_p (dst, op0)
19340                                    && CONST_INT_P (op1)
19341                                    && IN_RANGE (INTVAL (op1), -255, 255))
19342                             action = CONV;
19343                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19344                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19345                           else if (CONST_INT_P (op1)
19346                                    && IN_RANGE (INTVAL (op1), -7, 7))
19347                             action = CONV;
19348                         }
19349                       /* ADCS <Rd>, <Rn>  */
19350                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19351                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19352                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19353                                                        SImode)
19354                               && COMPARISON_P (op1)
19355                               && cc_register (XEXP (op1, 0), VOIDmode)
19356                               && maybe_get_arm_condition_code (op1) == ARM_CS
19357                               && XEXP (op1, 1) == const0_rtx)
19358                         action = CONV;
19359                       break;
19360
19361                     case MINUS:
19362                       /* RSBS <Rd>,<Rn>,#0
19363                          Not handled here: see NEG below.  */
19364                       /* SUBS <Rd>,<Rn>,#<imm3>
19365                          SUBS <Rdn>,#<imm8>
19366                          Not handled here: see PLUS above.  */
19367                       /* SUBS <Rd>,<Rn>,<Rm>  */
19368                       if (low_register_operand (op0, SImode)
19369                           && low_register_operand (op1, SImode))
19370                             action = CONV;
19371                       break;
19372
19373                     case MULT:
19374                       /* MULS <Rdm>,<Rn>,<Rdm>
19375                          As an exception to the rule, this is only used
19376                          when optimizing for size since MULS is slow on all
19377                          known implementations.  We do not even want to use
19378                          MULS in cold code, if optimizing for speed, so we
19379                          test the global flag here.  */
19380                       if (!optimize_size)
19381                         break;
19382                       /* Fall through.  */
19383                     case AND:
19384                     case IOR:
19385                     case XOR:
19386                       /* ANDS <Rdn>,<Rm>  */
19387                       if (rtx_equal_p (dst, op0)
19388                           && low_register_operand (op1, SImode))
19389                         action = action_for_partial_flag_setting;
19390                       else if (rtx_equal_p (dst, op1)
19391                                && low_register_operand (op0, SImode))
19392                         action = action_for_partial_flag_setting == SKIP
19393                                  ? SKIP : SWAP_CONV;
19394                       break;
19395
19396                     case ASHIFTRT:
19397                     case ASHIFT:
19398                     case LSHIFTRT:
19399                       /* ASRS <Rdn>,<Rm> */
19400                       /* LSRS <Rdn>,<Rm> */
19401                       /* LSLS <Rdn>,<Rm> */
19402                       if (rtx_equal_p (dst, op0)
19403                           && low_register_operand (op1, SImode))
19404                         action = action_for_partial_flag_setting;
19405                       /* ASRS <Rd>,<Rm>,#<imm5> */
19406                       /* LSRS <Rd>,<Rm>,#<imm5> */
19407                       /* LSLS <Rd>,<Rm>,#<imm5> */
19408                       else if (low_register_operand (op0, SImode)
19409                                && CONST_INT_P (op1)
19410                                && IN_RANGE (INTVAL (op1), 0, 31))
19411                         action = action_for_partial_flag_setting;
19412                       break;
19413
19414                     case ROTATERT:
19415                       /* RORS <Rdn>,<Rm>  */
19416                       if (rtx_equal_p (dst, op0)
19417                           && low_register_operand (op1, SImode))
19418                         action = action_for_partial_flag_setting;
19419                       break;
19420
19421                     case NOT:
19422                       /* MVNS <Rd>,<Rm>  */
19423                       if (low_register_operand (op0, SImode))
19424                         action = action_for_partial_flag_setting;
19425                       break;
19426
19427                     case NEG:
19428                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19429                       if (low_register_operand (op0, SImode))
19430                         action = CONV;
19431                       break;
19432
19433                     case CONST_INT:
19434                       /* MOVS <Rd>,#<imm8>  */
19435                       if (CONST_INT_P (src)
19436                           && IN_RANGE (INTVAL (src), 0, 255))
19437                         action = action_for_partial_flag_setting;
19438                       break;
19439
19440                     case REG:
19441                       /* MOVS and MOV<c> with registers have different
19442                          encodings, so are not relevant here.  */
19443                       break;
19444
19445                     default:
19446                       break;
19447                     }
19448                 }
19449
19450               if (action != SKIP)
19451                 {
19452                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19453                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19454                   rtvec vec;
19455
19456                   if (action == SWAP_CONV)
19457                     {
19458                       src = copy_rtx (src);
19459                       XEXP (src, 0) = op1;
19460                       XEXP (src, 1) = op0;
19461                       pat = gen_rtx_SET (dst, src);
19462                       vec = gen_rtvec (2, pat, clobber);
19463                     }
19464                   else /* action == CONV */
19465                     vec = gen_rtvec (2, pat, clobber);
19466
19467                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19468                   INSN_CODE (insn) = -1;
19469                 }
19470             }
19471
19472           if (NONDEBUG_INSN_P (insn))
19473             df_simulate_one_insn_backwards (bb, insn, &live);
19474         }
19475     }
19476
19477   CLEAR_REG_SET (&live);
19478 }
19479
19480 /* Gcc puts the pool in the wrong place for ARM, since we can only
19481    load addresses a limited distance around the pc.  We do some
19482    special munging to move the constant pool values to the correct
19483    point in the code.  */
19484 static void
19485 arm_reorg (void)
19486 {
19487   rtx_insn *insn;
19488   HOST_WIDE_INT address = 0;
19489   Mfix * fix;
19490
19491   if (use_cmse)
19492     cmse_nonsecure_call_inline_register_clear ();
19493
19494   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19495   if (cfun->is_thunk)
19496     ;
19497   else if (TARGET_THUMB1)
19498     thumb1_reorg ();
19499   else if (TARGET_THUMB2)
19500     thumb2_reorg ();
19501
19502   /* Ensure all insns that must be split have been split at this point.
19503      Otherwise, the pool placement code below may compute incorrect
19504      insn lengths.  Note that when optimizing, all insns have already
19505      been split at this point.  */
19506   if (!optimize)
19507     split_all_insns_noflow ();
19508
19509   /* Make sure we do not attempt to create a literal pool even though it should
19510      no longer be necessary to create any.  */
19511   if (arm_disable_literal_pool)
19512     return ;
19513
19514   minipool_fix_head = minipool_fix_tail = NULL;
19515
19516   /* The first insn must always be a note, or the code below won't
19517      scan it properly.  */
19518   insn = get_insns ();
19519   gcc_assert (NOTE_P (insn));
19520   minipool_pad = 0;
19521
19522   /* Scan all the insns and record the operands that will need fixing.  */
19523   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19524     {
19525       if (BARRIER_P (insn))
19526         push_minipool_barrier (insn, address);
19527       else if (INSN_P (insn))
19528         {
19529           rtx_jump_table_data *table;
19530
19531           note_invalid_constants (insn, address, true);
19532           address += get_attr_length (insn);
19533
19534           /* If the insn is a vector jump, add the size of the table
19535              and skip the table.  */
19536           if (tablejump_p (insn, NULL, &table))
19537             {
19538               address += get_jump_table_size (table);
19539               insn = table;
19540             }
19541         }
19542       else if (LABEL_P (insn))
19543         /* Add the worst-case padding due to alignment.  We don't add
19544            the _current_ padding because the minipool insertions
19545            themselves might change it.  */
19546         address += get_label_padding (insn);
19547     }
19548
19549   fix = minipool_fix_head;
19550
19551   /* Now scan the fixups and perform the required changes.  */
19552   while (fix)
19553     {
19554       Mfix * ftmp;
19555       Mfix * fdel;
19556       Mfix *  last_added_fix;
19557       Mfix * last_barrier = NULL;
19558       Mfix * this_fix;
19559
19560       /* Skip any further barriers before the next fix.  */
19561       while (fix && BARRIER_P (fix->insn))
19562         fix = fix->next;
19563
19564       /* No more fixes.  */
19565       if (fix == NULL)
19566         break;
19567
19568       last_added_fix = NULL;
19569
19570       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19571         {
19572           if (BARRIER_P (ftmp->insn))
19573             {
19574               if (ftmp->address >= minipool_vector_head->max_address)
19575                 break;
19576
19577               last_barrier = ftmp;
19578             }
19579           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19580             break;
19581
19582           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19583         }
19584
19585       /* If we found a barrier, drop back to that; any fixes that we
19586          could have reached but come after the barrier will now go in
19587          the next mini-pool.  */
19588       if (last_barrier != NULL)
19589         {
19590           /* Reduce the refcount for those fixes that won't go into this
19591              pool after all.  */
19592           for (fdel = last_barrier->next;
19593                fdel && fdel != ftmp;
19594                fdel = fdel->next)
19595             {
19596               fdel->minipool->refcount--;
19597               fdel->minipool = NULL;
19598             }
19599
19600           ftmp = last_barrier;
19601         }
19602       else
19603         {
19604           /* ftmp is first fix that we can't fit into this pool and
19605              there no natural barriers that we could use.  Insert a
19606              new barrier in the code somewhere between the previous
19607              fix and this one, and arrange to jump around it.  */
19608           HOST_WIDE_INT max_address;
19609
19610           /* The last item on the list of fixes must be a barrier, so
19611              we can never run off the end of the list of fixes without
19612              last_barrier being set.  */
19613           gcc_assert (ftmp);
19614
19615           max_address = minipool_vector_head->max_address;
19616           /* Check that there isn't another fix that is in range that
19617              we couldn't fit into this pool because the pool was
19618              already too large: we need to put the pool before such an
19619              instruction.  The pool itself may come just after the
19620              fix because create_fix_barrier also allows space for a
19621              jump instruction.  */
19622           if (ftmp->address < max_address)
19623             max_address = ftmp->address + 1;
19624
19625           last_barrier = create_fix_barrier (last_added_fix, max_address);
19626         }
19627
19628       assign_minipool_offsets (last_barrier);
19629
19630       while (ftmp)
19631         {
19632           if (!BARRIER_P (ftmp->insn)
19633               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19634                   == NULL))
19635             break;
19636
19637           ftmp = ftmp->next;
19638         }
19639
19640       /* Scan over the fixes we have identified for this pool, fixing them
19641          up and adding the constants to the pool itself.  */
19642       for (this_fix = fix; this_fix && ftmp != this_fix;
19643            this_fix = this_fix->next)
19644         if (!BARRIER_P (this_fix->insn))
19645           {
19646             rtx addr
19647               = plus_constant (Pmode,
19648                                gen_rtx_LABEL_REF (VOIDmode,
19649                                                   minipool_vector_label),
19650                                this_fix->minipool->offset);
19651             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19652           }
19653
19654       dump_minipool (last_barrier->insn);
19655       fix = ftmp;
19656     }
19657
19658   /* From now on we must synthesize any constants that we can't handle
19659      directly.  This can happen if the RTL gets split during final
19660      instruction generation.  */
19661   cfun->machine->after_arm_reorg = 1;
19662
19663   /* Free the minipool memory.  */
19664   obstack_free (&minipool_obstack, minipool_startobj);
19665 }
19666 \f
19667 /* Routines to output assembly language.  */
19668
19669 /* Return string representation of passed in real value.  */
19670 static const char *
19671 fp_const_from_val (REAL_VALUE_TYPE *r)
19672 {
19673   if (!fp_consts_inited)
19674     init_fp_table ();
19675
19676   gcc_assert (real_equal (r, &value_fp0));
19677   return "0";
19678 }
19679
19680 /* OPERANDS[0] is the entire list of insns that constitute pop,
19681    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19682    is in the list, UPDATE is true iff the list contains explicit
19683    update of base register.  */
19684 void
19685 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19686                          bool update)
19687 {
19688   int i;
19689   char pattern[100];
19690   int offset;
19691   const char *conditional;
19692   int num_saves = XVECLEN (operands[0], 0);
19693   unsigned int regno;
19694   unsigned int regno_base = REGNO (operands[1]);
19695   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19696
19697   offset = 0;
19698   offset += update ? 1 : 0;
19699   offset += return_pc ? 1 : 0;
19700
19701   /* Is the base register in the list?  */
19702   for (i = offset; i < num_saves; i++)
19703     {
19704       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19705       /* If SP is in the list, then the base register must be SP.  */
19706       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19707       /* If base register is in the list, there must be no explicit update.  */
19708       if (regno == regno_base)
19709         gcc_assert (!update);
19710     }
19711
19712   conditional = reverse ? "%?%D0" : "%?%d0";
19713   /* Can't use POP if returning from an interrupt.  */
19714   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19715     sprintf (pattern, "pop%s\t{", conditional);
19716   else
19717     {
19718       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19719          It's just a convention, their semantics are identical.  */
19720       if (regno_base == SP_REGNUM)
19721         sprintf (pattern, "ldmfd%s\t", conditional);
19722       else if (update)
19723         sprintf (pattern, "ldmia%s\t", conditional);
19724       else
19725         sprintf (pattern, "ldm%s\t", conditional);
19726
19727       strcat (pattern, reg_names[regno_base]);
19728       if (update)
19729         strcat (pattern, "!, {");
19730       else
19731         strcat (pattern, ", {");
19732     }
19733
19734   /* Output the first destination register.  */
19735   strcat (pattern,
19736           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19737
19738   /* Output the rest of the destination registers.  */
19739   for (i = offset + 1; i < num_saves; i++)
19740     {
19741       strcat (pattern, ", ");
19742       strcat (pattern,
19743               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19744     }
19745
19746   strcat (pattern, "}");
19747
19748   if (interrupt_p && return_pc)
19749     strcat (pattern, "^");
19750
19751   output_asm_insn (pattern, &cond);
19752 }
19753
19754
19755 /* Output the assembly for a store multiple.  */
19756
19757 const char *
19758 vfp_output_vstmd (rtx * operands)
19759 {
19760   char pattern[100];
19761   int p;
19762   int base;
19763   int i;
19764   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19765                    ? XEXP (operands[0], 0)
19766                    : XEXP (XEXP (operands[0], 0), 0);
19767   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19768
19769   if (push_p)
19770     strcpy (pattern, "vpush%?.64\t{%P1");
19771   else
19772     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19773
19774   p = strlen (pattern);
19775
19776   gcc_assert (REG_P (operands[1]));
19777
19778   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19779   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19780     {
19781       p += sprintf (&pattern[p], ", d%d", base + i);
19782     }
19783   strcpy (&pattern[p], "}");
19784
19785   output_asm_insn (pattern, operands);
19786   return "";
19787 }
19788
19789
19790 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19791    number of bytes pushed.  */
19792
19793 static int
19794 vfp_emit_fstmd (int base_reg, int count)
19795 {
19796   rtx par;
19797   rtx dwarf;
19798   rtx tmp, reg;
19799   int i;
19800
19801   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19802      register pairs are stored by a store multiple insn.  We avoid this
19803      by pushing an extra pair.  */
19804   if (count == 2 && !arm_arch6)
19805     {
19806       if (base_reg == LAST_VFP_REGNUM - 3)
19807         base_reg -= 2;
19808       count++;
19809     }
19810
19811   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19812      larger stores into multiple parts (up to a maximum of two, in
19813      practice).  */
19814   if (count > 16)
19815     {
19816       int saved;
19817       /* NOTE: base_reg is an internal register number, so each D register
19818          counts as 2.  */
19819       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19820       saved += vfp_emit_fstmd (base_reg, 16);
19821       return saved;
19822     }
19823
19824   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19825   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19826
19827   reg = gen_rtx_REG (DFmode, base_reg);
19828   base_reg += 2;
19829
19830   XVECEXP (par, 0, 0)
19831     = gen_rtx_SET (gen_frame_mem
19832                    (BLKmode,
19833                     gen_rtx_PRE_MODIFY (Pmode,
19834                                         stack_pointer_rtx,
19835                                         plus_constant
19836                                         (Pmode, stack_pointer_rtx,
19837                                          - (count * 8)))
19838                     ),
19839                    gen_rtx_UNSPEC (BLKmode,
19840                                    gen_rtvec (1, reg),
19841                                    UNSPEC_PUSH_MULT));
19842
19843   tmp = gen_rtx_SET (stack_pointer_rtx,
19844                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19845   RTX_FRAME_RELATED_P (tmp) = 1;
19846   XVECEXP (dwarf, 0, 0) = tmp;
19847
19848   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19849   RTX_FRAME_RELATED_P (tmp) = 1;
19850   XVECEXP (dwarf, 0, 1) = tmp;
19851
19852   for (i = 1; i < count; i++)
19853     {
19854       reg = gen_rtx_REG (DFmode, base_reg);
19855       base_reg += 2;
19856       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19857
19858       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19859                                         plus_constant (Pmode,
19860                                                        stack_pointer_rtx,
19861                                                        i * 8)),
19862                          reg);
19863       RTX_FRAME_RELATED_P (tmp) = 1;
19864       XVECEXP (dwarf, 0, i + 1) = tmp;
19865     }
19866
19867   par = emit_insn (par);
19868   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19869   RTX_FRAME_RELATED_P (par) = 1;
19870
19871   return count * 8;
19872 }
19873
19874 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19875    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19876
19877 bool
19878 detect_cmse_nonsecure_call (tree addr)
19879 {
19880   if (!addr)
19881     return FALSE;
19882
19883   tree fntype = TREE_TYPE (addr);
19884   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19885                                     TYPE_ATTRIBUTES (fntype)))
19886     return TRUE;
19887   return FALSE;
19888 }
19889
19890
19891 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19892    the call target.  */
19893
19894 void
19895 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19896 {
19897   rtx insn;
19898
19899   insn = emit_call_insn (pat);
19900
19901   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19902      If the call might use such an entry, add a use of the PIC register
19903      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19904   if (TARGET_VXWORKS_RTP
19905       && flag_pic
19906       && !sibcall
19907       && SYMBOL_REF_P (addr)
19908       && (SYMBOL_REF_DECL (addr)
19909           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19910           : !SYMBOL_REF_LOCAL_P (addr)))
19911     {
19912       require_pic_register (NULL_RTX, false /*compute_now*/);
19913       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19914     }
19915
19916   if (TARGET_FDPIC)
19917     {
19918       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19919       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19920     }
19921
19922   if (TARGET_AAPCS_BASED)
19923     {
19924       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19925          linker.  We need to add an IP clobber to allow setting
19926          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19927          is not needed since it's a fixed register.  */
19928       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19929       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19930     }
19931 }
19932
19933 /* Output a 'call' insn.  */
19934 const char *
19935 output_call (rtx *operands)
19936 {
19937   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19938
19939   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19940   if (REGNO (operands[0]) == LR_REGNUM)
19941     {
19942       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19943       output_asm_insn ("mov%?\t%0, %|lr", operands);
19944     }
19945
19946   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19947
19948   if (TARGET_INTERWORK || arm_arch4t)
19949     output_asm_insn ("bx%?\t%0", operands);
19950   else
19951     output_asm_insn ("mov%?\t%|pc, %0", operands);
19952
19953   return "";
19954 }
19955
19956 /* Output a move from arm registers to arm registers of a long double
19957    OPERANDS[0] is the destination.
19958    OPERANDS[1] is the source.  */
19959 const char *
19960 output_mov_long_double_arm_from_arm (rtx *operands)
19961 {
19962   /* We have to be careful here because the two might overlap.  */
19963   int dest_start = REGNO (operands[0]);
19964   int src_start = REGNO (operands[1]);
19965   rtx ops[2];
19966   int i;
19967
19968   if (dest_start < src_start)
19969     {
19970       for (i = 0; i < 3; i++)
19971         {
19972           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19973           ops[1] = gen_rtx_REG (SImode, src_start + i);
19974           output_asm_insn ("mov%?\t%0, %1", ops);
19975         }
19976     }
19977   else
19978     {
19979       for (i = 2; i >= 0; i--)
19980         {
19981           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19982           ops[1] = gen_rtx_REG (SImode, src_start + i);
19983           output_asm_insn ("mov%?\t%0, %1", ops);
19984         }
19985     }
19986
19987   return "";
19988 }
19989
19990 void
19991 arm_emit_movpair (rtx dest, rtx src)
19992  {
19993   /* If the src is an immediate, simplify it.  */
19994   if (CONST_INT_P (src))
19995     {
19996       HOST_WIDE_INT val = INTVAL (src);
19997       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19998       if ((val >> 16) & 0x0000ffff)
19999         {
20000           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20001                                                GEN_INT (16)),
20002                          GEN_INT ((val >> 16) & 0x0000ffff));
20003           rtx_insn *insn = get_last_insn ();
20004           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20005         }
20006       return;
20007     }
20008    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20009    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20010    rtx_insn *insn = get_last_insn ();
20011    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20012  }
20013
20014 /* Output a move between double words.  It must be REG<-MEM
20015    or MEM<-REG.  */
20016 const char *
20017 output_move_double (rtx *operands, bool emit, int *count)
20018 {
20019   enum rtx_code code0 = GET_CODE (operands[0]);
20020   enum rtx_code code1 = GET_CODE (operands[1]);
20021   rtx otherops[3];
20022   if (count)
20023     *count = 1;
20024
20025   /* The only case when this might happen is when
20026      you are looking at the length of a DImode instruction
20027      that has an invalid constant in it.  */
20028   if (code0 == REG && code1 != MEM)
20029     {
20030       gcc_assert (!emit);
20031       *count = 2;
20032       return "";
20033     }
20034
20035   if (code0 == REG)
20036     {
20037       unsigned int reg0 = REGNO (operands[0]);
20038       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20039
20040       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20041
20042       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20043
20044       switch (GET_CODE (XEXP (operands[1], 0)))
20045         {
20046         case REG:
20047
20048           if (emit)
20049             {
20050               if (can_ldrd
20051                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20052                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20053               else
20054                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20055             }
20056           break;
20057
20058         case PRE_INC:
20059           gcc_assert (can_ldrd);
20060           if (emit)
20061             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20062           break;
20063
20064         case PRE_DEC:
20065           if (emit)
20066             {
20067               if (can_ldrd)
20068                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20069               else
20070                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20071             }
20072           break;
20073
20074         case POST_INC:
20075           if (emit)
20076             {
20077               if (can_ldrd)
20078                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20079               else
20080                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20081             }
20082           break;
20083
20084         case POST_DEC:
20085           gcc_assert (can_ldrd);
20086           if (emit)
20087             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20088           break;
20089
20090         case PRE_MODIFY:
20091         case POST_MODIFY:
20092           /* Autoicrement addressing modes should never have overlapping
20093              base and destination registers, and overlapping index registers
20094              are already prohibited, so this doesn't need to worry about
20095              fix_cm3_ldrd.  */
20096           otherops[0] = operands[0];
20097           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20098           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20099
20100           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20101             {
20102               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20103                 {
20104                   /* Registers overlap so split out the increment.  */
20105                   if (emit)
20106                     {
20107                       gcc_assert (can_ldrd);
20108                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20109                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20110                     }
20111                   if (count)
20112                     *count = 2;
20113                 }
20114               else
20115                 {
20116                   /* Use a single insn if we can.
20117                      FIXME: IWMMXT allows offsets larger than ldrd can
20118                      handle, fix these up with a pair of ldr.  */
20119                   if (can_ldrd
20120                       && (TARGET_THUMB2
20121                       || !CONST_INT_P (otherops[2])
20122                       || (INTVAL (otherops[2]) > -256
20123                           && INTVAL (otherops[2]) < 256)))
20124                     {
20125                       if (emit)
20126                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20127                     }
20128                   else
20129                     {
20130                       if (emit)
20131                         {
20132                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20133                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20134                         }
20135                       if (count)
20136                         *count = 2;
20137
20138                     }
20139                 }
20140             }
20141           else
20142             {
20143               /* Use a single insn if we can.
20144                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20145                  fix these up with a pair of ldr.  */
20146               if (can_ldrd
20147                   && (TARGET_THUMB2
20148                   || !CONST_INT_P (otherops[2])
20149                   || (INTVAL (otherops[2]) > -256
20150                       && INTVAL (otherops[2]) < 256)))
20151                 {
20152                   if (emit)
20153                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20154                 }
20155               else
20156                 {
20157                   if (emit)
20158                     {
20159                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20160                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20161                     }
20162                   if (count)
20163                     *count = 2;
20164                 }
20165             }
20166           break;
20167
20168         case LABEL_REF:
20169         case CONST:
20170           /* We might be able to use ldrd %0, %1 here.  However the range is
20171              different to ldr/adr, and it is broken on some ARMv7-M
20172              implementations.  */
20173           /* Use the second register of the pair to avoid problematic
20174              overlap.  */
20175           otherops[1] = operands[1];
20176           if (emit)
20177             output_asm_insn ("adr%?\t%0, %1", otherops);
20178           operands[1] = otherops[0];
20179           if (emit)
20180             {
20181               if (can_ldrd)
20182                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20183               else
20184                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20185             }
20186
20187           if (count)
20188             *count = 2;
20189           break;
20190
20191           /* ??? This needs checking for thumb2.  */
20192         default:
20193           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20194                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20195             {
20196               otherops[0] = operands[0];
20197               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20198               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20199
20200               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20201                 {
20202                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20203                     {
20204                       switch ((int) INTVAL (otherops[2]))
20205                         {
20206                         case -8:
20207                           if (emit)
20208                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20209                           return "";
20210                         case -4:
20211                           if (TARGET_THUMB2)
20212                             break;
20213                           if (emit)
20214                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20215                           return "";
20216                         case 4:
20217                           if (TARGET_THUMB2)
20218                             break;
20219                           if (emit)
20220                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20221                           return "";
20222                         }
20223                     }
20224                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20225                   operands[1] = otherops[0];
20226                   if (can_ldrd
20227                       && (REG_P (otherops[2])
20228                           || TARGET_THUMB2
20229                           || (CONST_INT_P (otherops[2])
20230                               && INTVAL (otherops[2]) > -256
20231                               && INTVAL (otherops[2]) < 256)))
20232                     {
20233                       if (reg_overlap_mentioned_p (operands[0],
20234                                                    otherops[2]))
20235                         {
20236                           /* Swap base and index registers over to
20237                              avoid a conflict.  */
20238                           std::swap (otherops[1], otherops[2]);
20239                         }
20240                       /* If both registers conflict, it will usually
20241                          have been fixed by a splitter.  */
20242                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20243                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20244                         {
20245                           if (emit)
20246                             {
20247                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20248                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20249                             }
20250                           if (count)
20251                             *count = 2;
20252                         }
20253                       else
20254                         {
20255                           otherops[0] = operands[0];
20256                           if (emit)
20257                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20258                         }
20259                       return "";
20260                     }
20261
20262                   if (CONST_INT_P (otherops[2]))
20263                     {
20264                       if (emit)
20265                         {
20266                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20267                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20268                           else
20269                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20270                         }
20271                     }
20272                   else
20273                     {
20274                       if (emit)
20275                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20276                     }
20277                 }
20278               else
20279                 {
20280                   if (emit)
20281                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20282                 }
20283
20284               if (count)
20285                 *count = 2;
20286
20287               if (can_ldrd)
20288                 return "ldrd%?\t%0, [%1]";
20289
20290               return "ldmia%?\t%1, %M0";
20291             }
20292           else
20293             {
20294               otherops[1] = adjust_address (operands[1], SImode, 4);
20295               /* Take care of overlapping base/data reg.  */
20296               if (reg_mentioned_p (operands[0], operands[1]))
20297                 {
20298                   if (emit)
20299                     {
20300                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20301                       output_asm_insn ("ldr%?\t%0, %1", operands);
20302                     }
20303                   if (count)
20304                     *count = 2;
20305
20306                 }
20307               else
20308                 {
20309                   if (emit)
20310                     {
20311                       output_asm_insn ("ldr%?\t%0, %1", operands);
20312                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20313                     }
20314                   if (count)
20315                     *count = 2;
20316                 }
20317             }
20318         }
20319     }
20320   else
20321     {
20322       /* Constraints should ensure this.  */
20323       gcc_assert (code0 == MEM && code1 == REG);
20324       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20325                   || (TARGET_ARM && TARGET_LDRD));
20326
20327       /* For TARGET_ARM the first source register of an STRD
20328          must be even.  This is usually the case for double-word
20329          values but user assembly constraints can force an odd
20330          starting register.  */
20331       bool allow_strd = TARGET_LDRD
20332                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20333       switch (GET_CODE (XEXP (operands[0], 0)))
20334         {
20335         case REG:
20336           if (emit)
20337             {
20338               if (allow_strd)
20339                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20340               else
20341                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20342             }
20343           break;
20344
20345         case PRE_INC:
20346           gcc_assert (allow_strd);
20347           if (emit)
20348             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20349           break;
20350
20351         case PRE_DEC:
20352           if (emit)
20353             {
20354               if (allow_strd)
20355                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20356               else
20357                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20358             }
20359           break;
20360
20361         case POST_INC:
20362           if (emit)
20363             {
20364               if (allow_strd)
20365                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20366               else
20367                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20368             }
20369           break;
20370
20371         case POST_DEC:
20372           gcc_assert (allow_strd);
20373           if (emit)
20374             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20375           break;
20376
20377         case PRE_MODIFY:
20378         case POST_MODIFY:
20379           otherops[0] = operands[1];
20380           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20381           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20382
20383           /* IWMMXT allows offsets larger than strd can handle,
20384              fix these up with a pair of str.  */
20385           if (!TARGET_THUMB2
20386               && CONST_INT_P (otherops[2])
20387               && (INTVAL(otherops[2]) <= -256
20388                   || INTVAL(otherops[2]) >= 256))
20389             {
20390               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20391                 {
20392                   if (emit)
20393                     {
20394                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20395                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20396                     }
20397                   if (count)
20398                     *count = 2;
20399                 }
20400               else
20401                 {
20402                   if (emit)
20403                     {
20404                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20405                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20406                     }
20407                   if (count)
20408                     *count = 2;
20409                 }
20410             }
20411           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20412             {
20413               if (emit)
20414                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20415             }
20416           else
20417             {
20418               if (emit)
20419                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20420             }
20421           break;
20422
20423         case PLUS:
20424           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20425           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20426             {
20427               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20428                 {
20429                 case -8:
20430                   if (emit)
20431                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20432                   return "";
20433
20434                 case -4:
20435                   if (TARGET_THUMB2)
20436                     break;
20437                   if (emit)
20438                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20439                   return "";
20440
20441                 case 4:
20442                   if (TARGET_THUMB2)
20443                     break;
20444                   if (emit)
20445                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20446                   return "";
20447                 }
20448             }
20449           if (allow_strd
20450               && (REG_P (otherops[2])
20451                   || TARGET_THUMB2
20452                   || (CONST_INT_P (otherops[2])
20453                       && INTVAL (otherops[2]) > -256
20454                       && INTVAL (otherops[2]) < 256)))
20455             {
20456               otherops[0] = operands[1];
20457               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20458               if (emit)
20459                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20460               return "";
20461             }
20462           /* Fall through */
20463
20464         default:
20465           otherops[0] = adjust_address (operands[0], SImode, 4);
20466           otherops[1] = operands[1];
20467           if (emit)
20468             {
20469               output_asm_insn ("str%?\t%1, %0", operands);
20470               output_asm_insn ("str%?\t%H1, %0", otherops);
20471             }
20472           if (count)
20473             *count = 2;
20474         }
20475     }
20476
20477   return "";
20478 }
20479
20480 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20481    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20482
20483 const char *
20484 output_move_quad (rtx *operands)
20485 {
20486   if (REG_P (operands[0]))
20487     {
20488       /* Load, or reg->reg move.  */
20489
20490       if (MEM_P (operands[1]))
20491         {
20492           switch (GET_CODE (XEXP (operands[1], 0)))
20493             {
20494             case REG:
20495               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20496               break;
20497
20498             case LABEL_REF:
20499             case CONST:
20500               output_asm_insn ("adr%?\t%0, %1", operands);
20501               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20502               break;
20503
20504             default:
20505               gcc_unreachable ();
20506             }
20507         }
20508       else
20509         {
20510           rtx ops[2];
20511           int dest, src, i;
20512
20513           gcc_assert (REG_P (operands[1]));
20514
20515           dest = REGNO (operands[0]);
20516           src = REGNO (operands[1]);
20517
20518           /* This seems pretty dumb, but hopefully GCC won't try to do it
20519              very often.  */
20520           if (dest < src)
20521             for (i = 0; i < 4; i++)
20522               {
20523                 ops[0] = gen_rtx_REG (SImode, dest + i);
20524                 ops[1] = gen_rtx_REG (SImode, src + i);
20525                 output_asm_insn ("mov%?\t%0, %1", ops);
20526               }
20527           else
20528             for (i = 3; i >= 0; i--)
20529               {
20530                 ops[0] = gen_rtx_REG (SImode, dest + i);
20531                 ops[1] = gen_rtx_REG (SImode, src + i);
20532                 output_asm_insn ("mov%?\t%0, %1", ops);
20533               }
20534         }
20535     }
20536   else
20537     {
20538       gcc_assert (MEM_P (operands[0]));
20539       gcc_assert (REG_P (operands[1]));
20540       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20541
20542       switch (GET_CODE (XEXP (operands[0], 0)))
20543         {
20544         case REG:
20545           output_asm_insn ("stm%?\t%m0, %M1", operands);
20546           break;
20547
20548         default:
20549           gcc_unreachable ();
20550         }
20551     }
20552
20553   return "";
20554 }
20555
20556 /* Output a VFP load or store instruction.  */
20557
20558 const char *
20559 output_move_vfp (rtx *operands)
20560 {
20561   rtx reg, mem, addr, ops[2];
20562   int load = REG_P (operands[0]);
20563   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20564   int sp = (!TARGET_VFP_FP16INST
20565             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20566   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20567   const char *templ;
20568   char buff[50];
20569   machine_mode mode;
20570
20571   reg = operands[!load];
20572   mem = operands[load];
20573
20574   mode = GET_MODE (reg);
20575
20576   gcc_assert (REG_P (reg));
20577   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20578   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20579               || mode == SFmode
20580               || mode == DFmode
20581               || mode == HImode
20582               || mode == SImode
20583               || mode == DImode
20584               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20585   gcc_assert (MEM_P (mem));
20586
20587   addr = XEXP (mem, 0);
20588
20589   switch (GET_CODE (addr))
20590     {
20591     case PRE_DEC:
20592       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20593       ops[0] = XEXP (addr, 0);
20594       ops[1] = reg;
20595       break;
20596
20597     case POST_INC:
20598       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20599       ops[0] = XEXP (addr, 0);
20600       ops[1] = reg;
20601       break;
20602
20603     default:
20604       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20605       ops[0] = reg;
20606       ops[1] = mem;
20607       break;
20608     }
20609
20610   sprintf (buff, templ,
20611            load ? "ld" : "st",
20612            dp ? "64" : sp ? "32" : "16",
20613            dp ? "P" : "",
20614            integer_p ? "\t%@ int" : "");
20615   output_asm_insn (buff, ops);
20616
20617   return "";
20618 }
20619
20620 /* Output a Neon double-word or quad-word load or store, or a load
20621    or store for larger structure modes.
20622
20623    WARNING: The ordering of elements is weird in big-endian mode,
20624    because the EABI requires that vectors stored in memory appear
20625    as though they were stored by a VSTM, as required by the EABI.
20626    GCC RTL defines element ordering based on in-memory order.
20627    This can be different from the architectural ordering of elements
20628    within a NEON register. The intrinsics defined in arm_neon.h use the
20629    NEON register element ordering, not the GCC RTL element ordering.
20630
20631    For example, the in-memory ordering of a big-endian a quadword
20632    vector with 16-bit elements when stored from register pair {d0,d1}
20633    will be (lowest address first, d0[N] is NEON register element N):
20634
20635      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20636
20637    When necessary, quadword registers (dN, dN+1) are moved to ARM
20638    registers from rN in the order:
20639
20640      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20641
20642    So that STM/LDM can be used on vectors in ARM registers, and the
20643    same memory layout will result as if VSTM/VLDM were used.
20644
20645    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20646    possible, which allows use of appropriate alignment tags.
20647    Note that the choice of "64" is independent of the actual vector
20648    element size; this size simply ensures that the behavior is
20649    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20650
20651    Due to limitations of those instructions, use of VST1.64/VLD1.64
20652    is not possible if:
20653     - the address contains PRE_DEC, or
20654     - the mode refers to more than 4 double-word registers
20655
20656    In those cases, it would be possible to replace VSTM/VLDM by a
20657    sequence of instructions; this is not currently implemented since
20658    this is not certain to actually improve performance.  */
20659
20660 const char *
20661 output_move_neon (rtx *operands)
20662 {
20663   rtx reg, mem, addr, ops[2];
20664   int regno, nregs, load = REG_P (operands[0]);
20665   const char *templ;
20666   char buff[50];
20667   machine_mode mode;
20668
20669   reg = operands[!load];
20670   mem = operands[load];
20671
20672   mode = GET_MODE (reg);
20673
20674   gcc_assert (REG_P (reg));
20675   regno = REGNO (reg);
20676   nregs = REG_NREGS (reg) / 2;
20677   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20678               || NEON_REGNO_OK_FOR_QUAD (regno));
20679   gcc_assert (VALID_NEON_DREG_MODE (mode)
20680               || VALID_NEON_QREG_MODE (mode)
20681               || VALID_NEON_STRUCT_MODE (mode));
20682   gcc_assert (MEM_P (mem));
20683
20684   addr = XEXP (mem, 0);
20685
20686   /* Strip off const from addresses like (const (plus (...))).  */
20687   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20688     addr = XEXP (addr, 0);
20689
20690   switch (GET_CODE (addr))
20691     {
20692     case POST_INC:
20693       /* We have to use vldm / vstm for too-large modes.  */
20694       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20695         {
20696           templ = "v%smia%%?\t%%0!, %%h1";
20697           ops[0] = XEXP (addr, 0);
20698         }
20699       else
20700         {
20701           templ = "v%s1.64\t%%h1, %%A0";
20702           ops[0] = mem;
20703         }
20704       ops[1] = reg;
20705       break;
20706
20707     case PRE_DEC:
20708       /* We have to use vldm / vstm in this case, since there is no
20709          pre-decrement form of the vld1 / vst1 instructions.  */
20710       templ = "v%smdb%%?\t%%0!, %%h1";
20711       ops[0] = XEXP (addr, 0);
20712       ops[1] = reg;
20713       break;
20714
20715     case POST_MODIFY:
20716       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20717       gcc_unreachable ();
20718
20719     case REG:
20720       /* We have to use vldm / vstm for too-large modes.  */
20721       if (nregs > 1)
20722         {
20723           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20724             templ = "v%smia%%?\t%%m0, %%h1";
20725           else
20726             templ = "v%s1.64\t%%h1, %%A0";
20727
20728           ops[0] = mem;
20729           ops[1] = reg;
20730           break;
20731         }
20732       /* Fall through.  */
20733     case PLUS:
20734       if (GET_CODE (addr) == PLUS)
20735         addr = XEXP (addr, 0);
20736       /* Fall through.  */
20737     case LABEL_REF:
20738       {
20739         int i;
20740         int overlap = -1;
20741         for (i = 0; i < nregs; i++)
20742           {
20743             /* We're only using DImode here because it's a convenient
20744                size.  */
20745             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20746             ops[1] = adjust_address (mem, DImode, 8 * i);
20747             if (reg_overlap_mentioned_p (ops[0], mem))
20748               {
20749                 gcc_assert (overlap == -1);
20750                 overlap = i;
20751               }
20752             else
20753               {
20754                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20755                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20756                 else
20757                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20758                 output_asm_insn (buff, ops);
20759               }
20760           }
20761         if (overlap != -1)
20762           {
20763             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20764             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20765             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20766               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20767             else
20768               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20769             output_asm_insn (buff, ops);
20770           }
20771
20772         return "";
20773       }
20774
20775     default:
20776       gcc_unreachable ();
20777     }
20778
20779   sprintf (buff, templ, load ? "ld" : "st");
20780   output_asm_insn (buff, ops);
20781
20782   return "";
20783 }
20784
20785 /* Compute and return the length of neon_mov<mode>, where <mode> is
20786    one of VSTRUCT modes: EI, OI, CI or XI.  */
20787 int
20788 arm_attr_length_move_neon (rtx_insn *insn)
20789 {
20790   rtx reg, mem, addr;
20791   int load;
20792   machine_mode mode;
20793
20794   extract_insn_cached (insn);
20795
20796   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20797     {
20798       mode = GET_MODE (recog_data.operand[0]);
20799       switch (mode)
20800         {
20801         case E_EImode:
20802         case E_OImode:
20803           return 8;
20804         case E_CImode:
20805           return 12;
20806         case E_XImode:
20807           return 16;
20808         default:
20809           gcc_unreachable ();
20810         }
20811     }
20812
20813   load = REG_P (recog_data.operand[0]);
20814   reg = recog_data.operand[!load];
20815   mem = recog_data.operand[load];
20816
20817   gcc_assert (MEM_P (mem));
20818
20819   addr = XEXP (mem, 0);
20820
20821   /* Strip off const from addresses like (const (plus (...))).  */
20822   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20823     addr = XEXP (addr, 0);
20824
20825   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20826     {
20827       int insns = REG_NREGS (reg) / 2;
20828       return insns * 4;
20829     }
20830   else
20831     return 4;
20832 }
20833
20834 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20835    return zero.  */
20836
20837 int
20838 arm_address_offset_is_imm (rtx_insn *insn)
20839 {
20840   rtx mem, addr;
20841
20842   extract_insn_cached (insn);
20843
20844   if (REG_P (recog_data.operand[0]))
20845     return 0;
20846
20847   mem = recog_data.operand[0];
20848
20849   gcc_assert (MEM_P (mem));
20850
20851   addr = XEXP (mem, 0);
20852
20853   if (REG_P (addr)
20854       || (GET_CODE (addr) == PLUS
20855           && REG_P (XEXP (addr, 0))
20856           && CONST_INT_P (XEXP (addr, 1))))
20857     return 1;
20858   else
20859     return 0;
20860 }
20861
20862 /* Output an ADD r, s, #n where n may be too big for one instruction.
20863    If adding zero to one register, output nothing.  */
20864 const char *
20865 output_add_immediate (rtx *operands)
20866 {
20867   HOST_WIDE_INT n = INTVAL (operands[2]);
20868
20869   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20870     {
20871       if (n < 0)
20872         output_multi_immediate (operands,
20873                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20874                                 -n);
20875       else
20876         output_multi_immediate (operands,
20877                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20878                                 n);
20879     }
20880
20881   return "";
20882 }
20883
20884 /* Output a multiple immediate operation.
20885    OPERANDS is the vector of operands referred to in the output patterns.
20886    INSTR1 is the output pattern to use for the first constant.
20887    INSTR2 is the output pattern to use for subsequent constants.
20888    IMMED_OP is the index of the constant slot in OPERANDS.
20889    N is the constant value.  */
20890 static const char *
20891 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20892                         int immed_op, HOST_WIDE_INT n)
20893 {
20894 #if HOST_BITS_PER_WIDE_INT > 32
20895   n &= 0xffffffff;
20896 #endif
20897
20898   if (n == 0)
20899     {
20900       /* Quick and easy output.  */
20901       operands[immed_op] = const0_rtx;
20902       output_asm_insn (instr1, operands);
20903     }
20904   else
20905     {
20906       int i;
20907       const char * instr = instr1;
20908
20909       /* Note that n is never zero here (which would give no output).  */
20910       for (i = 0; i < 32; i += 2)
20911         {
20912           if (n & (3 << i))
20913             {
20914               operands[immed_op] = GEN_INT (n & (255 << i));
20915               output_asm_insn (instr, operands);
20916               instr = instr2;
20917               i += 6;
20918             }
20919         }
20920     }
20921
20922   return "";
20923 }
20924
20925 /* Return the name of a shifter operation.  */
20926 static const char *
20927 arm_shift_nmem(enum rtx_code code)
20928 {
20929   switch (code)
20930     {
20931     case ASHIFT:
20932       return ARM_LSL_NAME;
20933
20934     case ASHIFTRT:
20935       return "asr";
20936
20937     case LSHIFTRT:
20938       return "lsr";
20939
20940     case ROTATERT:
20941       return "ror";
20942
20943     default:
20944       abort();
20945     }
20946 }
20947
20948 /* Return the appropriate ARM instruction for the operation code.
20949    The returned result should not be overwritten.  OP is the rtx of the
20950    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20951    was shifted.  */
20952 const char *
20953 arithmetic_instr (rtx op, int shift_first_arg)
20954 {
20955   switch (GET_CODE (op))
20956     {
20957     case PLUS:
20958       return "add";
20959
20960     case MINUS:
20961       return shift_first_arg ? "rsb" : "sub";
20962
20963     case IOR:
20964       return "orr";
20965
20966     case XOR:
20967       return "eor";
20968
20969     case AND:
20970       return "and";
20971
20972     case ASHIFT:
20973     case ASHIFTRT:
20974     case LSHIFTRT:
20975     case ROTATERT:
20976       return arm_shift_nmem(GET_CODE(op));
20977
20978     default:
20979       gcc_unreachable ();
20980     }
20981 }
20982
20983 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20984    for the operation code.  The returned result should not be overwritten.
20985    OP is the rtx code of the shift.
20986    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20987    shift.  */
20988 static const char *
20989 shift_op (rtx op, HOST_WIDE_INT *amountp)
20990 {
20991   const char * mnem;
20992   enum rtx_code code = GET_CODE (op);
20993
20994   switch (code)
20995     {
20996     case ROTATE:
20997       if (!CONST_INT_P (XEXP (op, 1)))
20998         {
20999           output_operand_lossage ("invalid shift operand");
21000           return NULL;
21001         }
21002
21003       code = ROTATERT;
21004       *amountp = 32 - INTVAL (XEXP (op, 1));
21005       mnem = "ror";
21006       break;
21007
21008     case ASHIFT:
21009     case ASHIFTRT:
21010     case LSHIFTRT:
21011     case ROTATERT:
21012       mnem = arm_shift_nmem(code);
21013       if (CONST_INT_P (XEXP (op, 1)))
21014         {
21015           *amountp = INTVAL (XEXP (op, 1));
21016         }
21017       else if (REG_P (XEXP (op, 1)))
21018         {
21019           *amountp = -1;
21020           return mnem;
21021         }
21022       else
21023         {
21024           output_operand_lossage ("invalid shift operand");
21025           return NULL;
21026         }
21027       break;
21028
21029     case MULT:
21030       /* We never have to worry about the amount being other than a
21031          power of 2, since this case can never be reloaded from a reg.  */
21032       if (!CONST_INT_P (XEXP (op, 1)))
21033         {
21034           output_operand_lossage ("invalid shift operand");
21035           return NULL;
21036         }
21037
21038       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21039
21040       /* Amount must be a power of two.  */
21041       if (*amountp & (*amountp - 1))
21042         {
21043           output_operand_lossage ("invalid shift operand");
21044           return NULL;
21045         }
21046
21047       *amountp = exact_log2 (*amountp);
21048       gcc_assert (IN_RANGE (*amountp, 0, 31));
21049       return ARM_LSL_NAME;
21050
21051     default:
21052       output_operand_lossage ("invalid shift operand");
21053       return NULL;
21054     }
21055
21056   /* This is not 100% correct, but follows from the desire to merge
21057      multiplication by a power of 2 with the recognizer for a
21058      shift.  >=32 is not a valid shift for "lsl", so we must try and
21059      output a shift that produces the correct arithmetical result.
21060      Using lsr #32 is identical except for the fact that the carry bit
21061      is not set correctly if we set the flags; but we never use the
21062      carry bit from such an operation, so we can ignore that.  */
21063   if (code == ROTATERT)
21064     /* Rotate is just modulo 32.  */
21065     *amountp &= 31;
21066   else if (*amountp != (*amountp & 31))
21067     {
21068       if (code == ASHIFT)
21069         mnem = "lsr";
21070       *amountp = 32;
21071     }
21072
21073   /* Shifts of 0 are no-ops.  */
21074   if (*amountp == 0)
21075     return NULL;
21076
21077   return mnem;
21078 }
21079
21080 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21081    because /bin/as is horribly restrictive.  The judgement about
21082    whether or not each character is 'printable' (and can be output as
21083    is) or not (and must be printed with an octal escape) must be made
21084    with reference to the *host* character set -- the situation is
21085    similar to that discussed in the comments above pp_c_char in
21086    c-pretty-print.cc.  */
21087
21088 #define MAX_ASCII_LEN 51
21089
21090 void
21091 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21092 {
21093   int i;
21094   int len_so_far = 0;
21095
21096   fputs ("\t.ascii\t\"", stream);
21097
21098   for (i = 0; i < len; i++)
21099     {
21100       int c = p[i];
21101
21102       if (len_so_far >= MAX_ASCII_LEN)
21103         {
21104           fputs ("\"\n\t.ascii\t\"", stream);
21105           len_so_far = 0;
21106         }
21107
21108       if (ISPRINT (c))
21109         {
21110           if (c == '\\' || c == '\"')
21111             {
21112               putc ('\\', stream);
21113               len_so_far++;
21114             }
21115           putc (c, stream);
21116           len_so_far++;
21117         }
21118       else
21119         {
21120           fprintf (stream, "\\%03o", c);
21121           len_so_far += 4;
21122         }
21123     }
21124
21125   fputs ("\"\n", stream);
21126 }
21127 \f
21128
21129 /* Compute the register save mask for registers 0 through 12
21130    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21131
21132 static unsigned long
21133 arm_compute_save_reg0_reg12_mask (void)
21134 {
21135   unsigned long func_type = arm_current_func_type ();
21136   unsigned long save_reg_mask = 0;
21137   unsigned int reg;
21138
21139   if (IS_INTERRUPT (func_type))
21140     {
21141       unsigned int max_reg;
21142       /* Interrupt functions must not corrupt any registers,
21143          even call clobbered ones.  If this is a leaf function
21144          we can just examine the registers used by the RTL, but
21145          otherwise we have to assume that whatever function is
21146          called might clobber anything, and so we have to save
21147          all the call-clobbered registers as well.  */
21148       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21149         /* FIQ handlers have registers r8 - r12 banked, so
21150            we only need to check r0 - r7, Normal ISRs only
21151            bank r14 and r15, so we must check up to r12.
21152            r13 is the stack pointer which is always preserved,
21153            so we do not need to consider it here.  */
21154         max_reg = 7;
21155       else
21156         max_reg = 12;
21157
21158       for (reg = 0; reg <= max_reg; reg++)
21159         if (reg_needs_saving_p (reg))
21160           save_reg_mask |= (1 << reg);
21161
21162       /* Also save the pic base register if necessary.  */
21163       if (PIC_REGISTER_MAY_NEED_SAVING
21164           && crtl->uses_pic_offset_table)
21165         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21166     }
21167   else if (IS_VOLATILE(func_type))
21168     {
21169       /* For noreturn functions we historically omitted register saves
21170          altogether.  However this really messes up debugging.  As a
21171          compromise save just the frame pointers.  Combined with the link
21172          register saved elsewhere this should be sufficient to get
21173          a backtrace.  */
21174       if (frame_pointer_needed)
21175         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21176       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21177         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21178       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21179         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21180     }
21181   else
21182     {
21183       /* In the normal case we only need to save those registers
21184          which are call saved and which are used by this function.  */
21185       for (reg = 0; reg <= 11; reg++)
21186         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21187           save_reg_mask |= (1 << reg);
21188
21189       /* Handle the frame pointer as a special case.  */
21190       if (frame_pointer_needed)
21191         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21192
21193       /* If we aren't loading the PIC register,
21194          don't stack it even though it may be live.  */
21195       if (PIC_REGISTER_MAY_NEED_SAVING
21196           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21197               || crtl->uses_pic_offset_table))
21198         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21199
21200       /* The prologue will copy SP into R0, so save it.  */
21201       if (IS_STACKALIGN (func_type))
21202         save_reg_mask |= 1;
21203     }
21204
21205   /* Save registers so the exception handler can modify them.  */
21206   if (crtl->calls_eh_return)
21207     {
21208       unsigned int i;
21209
21210       for (i = 0; ; i++)
21211         {
21212           reg = EH_RETURN_DATA_REGNO (i);
21213           if (reg == INVALID_REGNUM)
21214             break;
21215           save_reg_mask |= 1 << reg;
21216         }
21217     }
21218
21219   return save_reg_mask;
21220 }
21221
21222 /* Return true if r3 is live at the start of the function.  */
21223
21224 static bool
21225 arm_r3_live_at_start_p (void)
21226 {
21227   /* Just look at cfg info, which is still close enough to correct at this
21228      point.  This gives false positives for broken functions that might use
21229      uninitialized data that happens to be allocated in r3, but who cares?  */
21230   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21231 }
21232
21233 /* Compute the number of bytes used to store the static chain register on the
21234    stack, above the stack frame.  We need to know this accurately to get the
21235    alignment of the rest of the stack frame correct.  */
21236
21237 static int
21238 arm_compute_static_chain_stack_bytes (void)
21239 {
21240   /* Once the value is updated from the init value of -1, do not
21241      re-compute.  */
21242   if (cfun->machine->static_chain_stack_bytes != -1)
21243     return cfun->machine->static_chain_stack_bytes;
21244
21245   /* See the defining assertion in arm_expand_prologue.  */
21246   if (IS_NESTED (arm_current_func_type ())
21247       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21248           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21249                || flag_stack_clash_protection)
21250               && !df_regs_ever_live_p (LR_REGNUM)))
21251       && arm_r3_live_at_start_p ()
21252       && crtl->args.pretend_args_size == 0)
21253     return 4;
21254
21255   return 0;
21256 }
21257
21258 /* Compute a bit mask of which core registers need to be
21259    saved on the stack for the current function.
21260    This is used by arm_compute_frame_layout, which may add extra registers.  */
21261
21262 static unsigned long
21263 arm_compute_save_core_reg_mask (void)
21264 {
21265   unsigned int save_reg_mask = 0;
21266   unsigned long func_type = arm_current_func_type ();
21267   unsigned int reg;
21268
21269   if (IS_NAKED (func_type))
21270     /* This should never really happen.  */
21271     return 0;
21272
21273   /* If we are creating a stack frame, then we must save the frame pointer,
21274      IP (which will hold the old stack pointer), LR and the PC.  */
21275   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21276     save_reg_mask |=
21277       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21278       | (1 << IP_REGNUM)
21279       | (1 << LR_REGNUM)
21280       | (1 << PC_REGNUM);
21281
21282   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21283
21284   if (arm_current_function_pac_enabled_p ())
21285     save_reg_mask |= 1 << IP_REGNUM;
21286
21287   /* Decide if we need to save the link register.
21288      Interrupt routines have their own banked link register,
21289      so they never need to save it.
21290      Otherwise if we do not use the link register we do not need to save
21291      it.  If we are pushing other registers onto the stack however, we
21292      can save an instruction in the epilogue by pushing the link register
21293      now and then popping it back into the PC.  This incurs extra memory
21294      accesses though, so we only do it when optimizing for size, and only
21295      if we know that we will not need a fancy return sequence.  */
21296   if (df_regs_ever_live_p (LR_REGNUM)
21297       || (save_reg_mask
21298           && optimize_size
21299           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21300           && !crtl->tail_call_emit
21301           && !crtl->calls_eh_return))
21302     save_reg_mask |= 1 << LR_REGNUM;
21303
21304   if (cfun->machine->lr_save_eliminated)
21305     save_reg_mask &= ~ (1 << LR_REGNUM);
21306
21307   if (TARGET_REALLY_IWMMXT
21308       && ((bit_count (save_reg_mask)
21309            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21310                            arm_compute_static_chain_stack_bytes())
21311            ) % 2) != 0)
21312     {
21313       /* The total number of registers that are going to be pushed
21314          onto the stack is odd.  We need to ensure that the stack
21315          is 64-bit aligned before we start to save iWMMXt registers,
21316          and also before we start to create locals.  (A local variable
21317          might be a double or long long which we will load/store using
21318          an iWMMXt instruction).  Therefore we need to push another
21319          ARM register, so that the stack will be 64-bit aligned.  We
21320          try to avoid using the arg registers (r0 -r3) as they might be
21321          used to pass values in a tail call.  */
21322       for (reg = 4; reg <= 12; reg++)
21323         if ((save_reg_mask & (1 << reg)) == 0)
21324           break;
21325
21326       if (reg <= 12)
21327         save_reg_mask |= (1 << reg);
21328       else
21329         {
21330           cfun->machine->sibcall_blocked = 1;
21331           save_reg_mask |= (1 << 3);
21332         }
21333     }
21334
21335   /* We may need to push an additional register for use initializing the
21336      PIC base register.  */
21337   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21338       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21339     {
21340       reg = thumb_find_work_register (1 << 4);
21341       if (!call_used_or_fixed_reg_p (reg))
21342         save_reg_mask |= (1 << reg);
21343     }
21344
21345   return save_reg_mask;
21346 }
21347
21348 /* Compute a bit mask of which core registers need to be
21349    saved on the stack for the current function.  */
21350 static unsigned long
21351 thumb1_compute_save_core_reg_mask (void)
21352 {
21353   unsigned long mask;
21354   unsigned reg;
21355
21356   mask = 0;
21357   for (reg = 0; reg < 12; reg ++)
21358     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21359       mask |= 1 << reg;
21360
21361   /* Handle the frame pointer as a special case.  */
21362   if (frame_pointer_needed)
21363     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21364
21365   if (flag_pic
21366       && !TARGET_SINGLE_PIC_BASE
21367       && arm_pic_register != INVALID_REGNUM
21368       && crtl->uses_pic_offset_table)
21369     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21370
21371   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21372   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21373     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21374
21375   /* LR will also be pushed if any lo regs are pushed.  */
21376   if (mask & 0xff || thumb_force_lr_save ())
21377     mask |= (1 << LR_REGNUM);
21378
21379   bool call_clobbered_scratch
21380     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21381        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21382
21383   /* Make sure we have a low work register if we need one.  We will
21384      need one if we are going to push a high register, but we are not
21385      currently intending to push a low register.  However if both the
21386      prologue and epilogue have a spare call-clobbered low register,
21387      then we won't need to find an additional work register.  It does
21388      not need to be the same register in the prologue and
21389      epilogue.  */
21390   if ((mask & 0xff) == 0
21391       && !call_clobbered_scratch
21392       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21393     {
21394       /* Use thumb_find_work_register to choose which register
21395          we will use.  If the register is live then we will
21396          have to push it.  Use LAST_LO_REGNUM as our fallback
21397          choice for the register to select.  */
21398       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21399       /* Make sure the register returned by thumb_find_work_register is
21400          not part of the return value.  */
21401       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21402         reg = LAST_LO_REGNUM;
21403
21404       if (callee_saved_reg_p (reg))
21405         mask |= 1 << reg;
21406     }
21407
21408   /* The 504 below is 8 bytes less than 512 because there are two possible
21409      alignment words.  We can't tell here if they will be present or not so we
21410      have to play it safe and assume that they are. */
21411   if ((CALLER_INTERWORKING_SLOT_SIZE +
21412        ROUND_UP_WORD (get_frame_size ()) +
21413        crtl->outgoing_args_size) >= 504)
21414     {
21415       /* This is the same as the code in thumb1_expand_prologue() which
21416          determines which register to use for stack decrement. */
21417       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21418         if (mask & (1 << reg))
21419           break;
21420
21421       if (reg > LAST_LO_REGNUM)
21422         {
21423           /* Make sure we have a register available for stack decrement. */
21424           mask |= 1 << LAST_LO_REGNUM;
21425         }
21426     }
21427
21428   return mask;
21429 }
21430
21431 /* Return the number of bytes required to save VFP registers.  */
21432 static int
21433 arm_get_vfp_saved_size (void)
21434 {
21435   unsigned int regno;
21436   int count;
21437   int saved;
21438
21439   saved = 0;
21440   /* Space for saved VFP registers.  */
21441   if (TARGET_VFP_BASE)
21442     {
21443       count = 0;
21444       for (regno = FIRST_VFP_REGNUM;
21445            regno < LAST_VFP_REGNUM;
21446            regno += 2)
21447         {
21448           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21449             {
21450               if (count > 0)
21451                 {
21452                   /* Workaround ARM10 VFPr1 bug.  */
21453                   if (count == 2 && !arm_arch6)
21454                     count++;
21455                   saved += count * 8;
21456                 }
21457               count = 0;
21458             }
21459           else
21460             count++;
21461         }
21462       if (count > 0)
21463         {
21464           if (count == 2 && !arm_arch6)
21465             count++;
21466           saved += count * 8;
21467         }
21468     }
21469   return saved;
21470 }
21471
21472
21473 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21474    everything bar the final return instruction.  If simple_return is true,
21475    then do not output epilogue, because it has already been emitted in RTL.
21476
21477    Note: do not forget to update length attribute of corresponding insn pattern
21478    when changing assembly output (eg. length attribute of
21479    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21480    register clearing sequences).  */
21481 const char *
21482 output_return_instruction (rtx operand, bool really_return, bool reverse,
21483                            bool simple_return)
21484 {
21485   char conditional[10];
21486   char instr[100];
21487   unsigned reg;
21488   unsigned long live_regs_mask;
21489   unsigned long func_type;
21490   arm_stack_offsets *offsets;
21491
21492   func_type = arm_current_func_type ();
21493
21494   if (IS_NAKED (func_type))
21495     return "";
21496
21497   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21498     {
21499       /* If this function was declared non-returning, and we have
21500          found a tail call, then we have to trust that the called
21501          function won't return.  */
21502       if (really_return)
21503         {
21504           rtx ops[2];
21505
21506           /* Otherwise, trap an attempted return by aborting.  */
21507           ops[0] = operand;
21508           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21509                                        : "abort");
21510           assemble_external_libcall (ops[1]);
21511           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21512         }
21513
21514       return "";
21515     }
21516
21517   gcc_assert (!cfun->calls_alloca || really_return);
21518
21519   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21520
21521   cfun->machine->return_used_this_function = 1;
21522
21523   offsets = arm_get_frame_offsets ();
21524   live_regs_mask = offsets->saved_regs_mask;
21525
21526   if (!simple_return && live_regs_mask)
21527     {
21528       const char * return_reg;
21529
21530       /* If we do not have any special requirements for function exit
21531          (e.g. interworking) then we can load the return address
21532          directly into the PC.  Otherwise we must load it into LR.  */
21533       if (really_return
21534           && !IS_CMSE_ENTRY (func_type)
21535           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21536         return_reg = reg_names[PC_REGNUM];
21537       else
21538         return_reg = reg_names[LR_REGNUM];
21539
21540       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21541         {
21542           /* There are three possible reasons for the IP register
21543              being saved.  1) a stack frame was created, in which case
21544              IP contains the old stack pointer, or 2) an ISR routine
21545              corrupted it, or 3) it was saved to align the stack on
21546              iWMMXt.  In case 1, restore IP into SP, otherwise just
21547              restore IP.  */
21548           if (frame_pointer_needed)
21549             {
21550               live_regs_mask &= ~ (1 << IP_REGNUM);
21551               live_regs_mask |=   (1 << SP_REGNUM);
21552             }
21553           else
21554             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21555         }
21556
21557       /* On some ARM architectures it is faster to use LDR rather than
21558          LDM to load a single register.  On other architectures, the
21559          cost is the same.  In 26 bit mode, or for exception handlers,
21560          we have to use LDM to load the PC so that the CPSR is also
21561          restored.  */
21562       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21563         if (live_regs_mask == (1U << reg))
21564           break;
21565
21566       if (reg <= LAST_ARM_REGNUM
21567           && (reg != LR_REGNUM
21568               || ! really_return
21569               || ! IS_INTERRUPT (func_type)))
21570         {
21571           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21572                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21573         }
21574       else
21575         {
21576           char *p;
21577           int first = 1;
21578
21579           /* Generate the load multiple instruction to restore the
21580              registers.  Note we can get here, even if
21581              frame_pointer_needed is true, but only if sp already
21582              points to the base of the saved core registers.  */
21583           if (live_regs_mask & (1 << SP_REGNUM))
21584             {
21585               unsigned HOST_WIDE_INT stack_adjust;
21586
21587               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21588               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21589
21590               if (stack_adjust && arm_arch5t && TARGET_ARM)
21591                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21592               else
21593                 {
21594                   /* If we can't use ldmib (SA110 bug),
21595                      then try to pop r3 instead.  */
21596                   if (stack_adjust)
21597                     live_regs_mask |= 1 << 3;
21598
21599                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21600                 }
21601             }
21602           /* For interrupt returns we have to use an LDM rather than
21603              a POP so that we can use the exception return variant.  */
21604           else if (IS_INTERRUPT (func_type))
21605             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21606           else
21607             sprintf (instr, "pop%s\t{", conditional);
21608
21609           p = instr + strlen (instr);
21610
21611           for (reg = 0; reg <= SP_REGNUM; reg++)
21612             if (live_regs_mask & (1 << reg))
21613               {
21614                 int l = strlen (reg_names[reg]);
21615
21616                 if (first)
21617                   first = 0;
21618                 else
21619                   {
21620                     memcpy (p, ", ", 2);
21621                     p += 2;
21622                   }
21623
21624                 memcpy (p, "%|", 2);
21625                 memcpy (p + 2, reg_names[reg], l);
21626                 p += l + 2;
21627               }
21628
21629           if (live_regs_mask & (1 << LR_REGNUM))
21630             {
21631               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21632               /* If returning from an interrupt, restore the CPSR.  */
21633               if (IS_INTERRUPT (func_type))
21634                 strcat (p, "^");
21635             }
21636           else
21637             strcpy (p, "}");
21638         }
21639
21640       output_asm_insn (instr, & operand);
21641
21642       /* See if we need to generate an extra instruction to
21643          perform the actual function return.  */
21644       if (really_return
21645           && func_type != ARM_FT_INTERWORKED
21646           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21647         {
21648           /* The return has already been handled
21649              by loading the LR into the PC.  */
21650           return "";
21651         }
21652     }
21653
21654   if (really_return)
21655     {
21656       switch ((int) ARM_FUNC_TYPE (func_type))
21657         {
21658         case ARM_FT_ISR:
21659         case ARM_FT_FIQ:
21660           /* ??? This is wrong for unified assembly syntax.  */
21661           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21662           break;
21663
21664         case ARM_FT_INTERWORKED:
21665           gcc_assert (arm_arch5t || arm_arch4t);
21666           sprintf (instr, "bx%s\t%%|lr", conditional);
21667           break;
21668
21669         case ARM_FT_EXCEPTION:
21670           /* ??? This is wrong for unified assembly syntax.  */
21671           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21672           break;
21673
21674         default:
21675           if (IS_CMSE_ENTRY (func_type))
21676             {
21677               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21678                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21679                  VSTR/VLDR instructions in the prologue and epilogue.  */
21680               if (!TARGET_HAVE_FPCXT_CMSE)
21681                 {
21682                   /* Check if we have to clear the 'GE bits' which is only used if
21683                      parallel add and subtraction instructions are available.  */
21684                   if (TARGET_INT_SIMD)
21685                     snprintf (instr, sizeof (instr),
21686                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21687                   else
21688                     snprintf (instr, sizeof (instr),
21689                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21690
21691                   output_asm_insn (instr, & operand);
21692                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21693                      care of it.  */
21694                   if (TARGET_HARD_FLOAT)
21695                     {
21696                       /* Clear the cumulative exception-status bits (0-4,7) and
21697                          the condition code bits (28-31) of the FPSCR.  We need
21698                          to remember to clear the first scratch register used
21699                          (IP) and save and restore the second (r4).
21700
21701                          Important note: the length of the
21702                          thumb2_cmse_entry_return insn pattern must account for
21703                          the size of the below instructions.  */
21704                       output_asm_insn ("push\t{%|r4}", & operand);
21705                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21706                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21707                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21708                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21709                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21710                       output_asm_insn ("pop\t{%|r4}", & operand);
21711                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21712                     }
21713                 }
21714               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21715             }
21716           /* Use bx if it's available.  */
21717           else if (arm_arch5t || arm_arch4t)
21718             sprintf (instr, "bx%s\t%%|lr", conditional);
21719           else
21720             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21721           break;
21722         }
21723
21724       output_asm_insn (instr, & operand);
21725     }
21726
21727   return "";
21728 }
21729
21730 /* Output in FILE asm statements needed to declare the NAME of the function
21731    defined by its DECL node.  */
21732
21733 void
21734 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21735 {
21736   size_t cmse_name_len;
21737   char *cmse_name = 0;
21738   char cmse_prefix[] = "__acle_se_";
21739
21740   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21741      extra function label for each function with the 'cmse_nonsecure_entry'
21742      attribute.  This extra function label should be prepended with
21743      '__acle_se_', telling the linker that it needs to create secure gateway
21744      veneers for this function.  */
21745   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21746                                     DECL_ATTRIBUTES (decl)))
21747     {
21748       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21749       cmse_name = XALLOCAVEC (char, cmse_name_len);
21750       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21751       targetm.asm_out.globalize_label (file, cmse_name);
21752
21753       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21754       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21755     }
21756
21757   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21758   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21759   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21760   ASM_OUTPUT_LABEL (file, name);
21761
21762   if (cmse_name)
21763     ASM_OUTPUT_LABEL (file, cmse_name);
21764
21765   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21766 }
21767
21768 /* Write the function name into the code section, directly preceding
21769    the function prologue.
21770
21771    Code will be output similar to this:
21772      t0
21773          .ascii "arm_poke_function_name", 0
21774          .align
21775      t1
21776          .word 0xff000000 + (t1 - t0)
21777      arm_poke_function_name
21778          mov     ip, sp
21779          stmfd   sp!, {fp, ip, lr, pc}
21780          sub     fp, ip, #4
21781
21782    When performing a stack backtrace, code can inspect the value
21783    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21784    at location pc - 12 and the top 8 bits are set, then we know
21785    that there is a function name embedded immediately preceding this
21786    location and has length ((pc[-3]) & 0xff000000).
21787
21788    We assume that pc is declared as a pointer to an unsigned long.
21789
21790    It is of no benefit to output the function name if we are assembling
21791    a leaf function.  These function types will not contain a stack
21792    backtrace structure, therefore it is not possible to determine the
21793    function name.  */
21794 void
21795 arm_poke_function_name (FILE *stream, const char *name)
21796 {
21797   unsigned long alignlength;
21798   unsigned long length;
21799   rtx           x;
21800
21801   length      = strlen (name) + 1;
21802   alignlength = ROUND_UP_WORD (length);
21803
21804   ASM_OUTPUT_ASCII (stream, name, length);
21805   ASM_OUTPUT_ALIGN (stream, 2);
21806   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21807   assemble_aligned_integer (UNITS_PER_WORD, x);
21808 }
21809
21810 /* Place some comments into the assembler stream
21811    describing the current function.  */
21812 static void
21813 arm_output_function_prologue (FILE *f)
21814 {
21815   unsigned long func_type;
21816
21817   /* Sanity check.  */
21818   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21819
21820   func_type = arm_current_func_type ();
21821
21822   switch ((int) ARM_FUNC_TYPE (func_type))
21823     {
21824     default:
21825     case ARM_FT_NORMAL:
21826       break;
21827     case ARM_FT_INTERWORKED:
21828       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21829       break;
21830     case ARM_FT_ISR:
21831       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21832       break;
21833     case ARM_FT_FIQ:
21834       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21835       break;
21836     case ARM_FT_EXCEPTION:
21837       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21838       break;
21839     }
21840
21841   if (IS_NAKED (func_type))
21842     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21843
21844   if (IS_VOLATILE (func_type))
21845     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21846
21847   if (IS_NESTED (func_type))
21848     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21849   if (IS_STACKALIGN (func_type))
21850     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21851   if (IS_CMSE_ENTRY (func_type))
21852     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21853
21854   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21855                (HOST_WIDE_INT) crtl->args.size,
21856                crtl->args.pretend_args_size,
21857                (HOST_WIDE_INT) get_frame_size ());
21858
21859   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21860                frame_pointer_needed,
21861                cfun->machine->uses_anonymous_args);
21862
21863   if (cfun->machine->lr_save_eliminated)
21864     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21865
21866   if (crtl->calls_eh_return)
21867     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21868
21869 }
21870
21871 static void
21872 arm_output_function_epilogue (FILE *)
21873 {
21874   arm_stack_offsets *offsets;
21875
21876   if (TARGET_THUMB1)
21877     {
21878       int regno;
21879
21880       /* Emit any call-via-reg trampolines that are needed for v4t support
21881          of call_reg and call_value_reg type insns.  */
21882       for (regno = 0; regno < LR_REGNUM; regno++)
21883         {
21884           rtx label = cfun->machine->call_via[regno];
21885
21886           if (label != NULL)
21887             {
21888               switch_to_section (function_section (current_function_decl));
21889               targetm.asm_out.internal_label (asm_out_file, "L",
21890                                               CODE_LABEL_NUMBER (label));
21891               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21892             }
21893         }
21894
21895       /* ??? Probably not safe to set this here, since it assumes that a
21896          function will be emitted as assembly immediately after we generate
21897          RTL for it.  This does not happen for inline functions.  */
21898       cfun->machine->return_used_this_function = 0;
21899     }
21900   else /* TARGET_32BIT */
21901     {
21902       /* We need to take into account any stack-frame rounding.  */
21903       offsets = arm_get_frame_offsets ();
21904
21905       gcc_assert (!use_return_insn (FALSE, NULL)
21906                   || (cfun->machine->return_used_this_function != 0)
21907                   || offsets->saved_regs == offsets->outgoing_args
21908                   || frame_pointer_needed);
21909     }
21910 }
21911
21912 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21913    STR and STRD.  If an even number of registers are being pushed, one
21914    or more STRD patterns are created for each register pair.  If an
21915    odd number of registers are pushed, emit an initial STR followed by
21916    as many STRD instructions as are needed.  This works best when the
21917    stack is initially 64-bit aligned (the normal case), since it
21918    ensures that each STRD is also 64-bit aligned.  */
21919 static void
21920 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21921 {
21922   int num_regs = 0;
21923   int i;
21924   int regno;
21925   rtx par = NULL_RTX;
21926   rtx dwarf = NULL_RTX;
21927   rtx tmp;
21928   bool first = true;
21929
21930   num_regs = bit_count (saved_regs_mask);
21931
21932   /* Must be at least one register to save, and can't save SP or PC.  */
21933   gcc_assert (num_regs > 0 && num_regs <= 14);
21934   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21935   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21936
21937   /* Create sequence for DWARF info.  All the frame-related data for
21938      debugging is held in this wrapper.  */
21939   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21940
21941   /* Describe the stack adjustment.  */
21942   tmp = gen_rtx_SET (stack_pointer_rtx,
21943                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21944   RTX_FRAME_RELATED_P (tmp) = 1;
21945   XVECEXP (dwarf, 0, 0) = tmp;
21946
21947   /* Find the first register.  */
21948   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21949     ;
21950
21951   i = 0;
21952
21953   /* If there's an odd number of registers to push.  Start off by
21954      pushing a single register.  This ensures that subsequent strd
21955      operations are dword aligned (assuming that SP was originally
21956      64-bit aligned).  */
21957   if ((num_regs & 1) != 0)
21958     {
21959       rtx reg, mem, insn;
21960
21961       reg = gen_rtx_REG (SImode, regno);
21962       if (num_regs == 1)
21963         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21964                                                      stack_pointer_rtx));
21965       else
21966         mem = gen_frame_mem (Pmode,
21967                              gen_rtx_PRE_MODIFY
21968                              (Pmode, stack_pointer_rtx,
21969                               plus_constant (Pmode, stack_pointer_rtx,
21970                                              -4 * num_regs)));
21971
21972       tmp = gen_rtx_SET (mem, reg);
21973       RTX_FRAME_RELATED_P (tmp) = 1;
21974       insn = emit_insn (tmp);
21975       RTX_FRAME_RELATED_P (insn) = 1;
21976       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21977       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21978       RTX_FRAME_RELATED_P (tmp) = 1;
21979       i++;
21980       regno++;
21981       XVECEXP (dwarf, 0, i) = tmp;
21982       first = false;
21983     }
21984
21985   while (i < num_regs)
21986     if (saved_regs_mask & (1 << regno))
21987       {
21988         rtx reg1, reg2, mem1, mem2;
21989         rtx tmp0, tmp1, tmp2;
21990         int regno2;
21991
21992         /* Find the register to pair with this one.  */
21993         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21994              regno2++)
21995           ;
21996
21997         reg1 = gen_rtx_REG (SImode, regno);
21998         reg2 = gen_rtx_REG (SImode, regno2);
21999
22000         if (first)
22001           {
22002             rtx insn;
22003
22004             first = false;
22005             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22006                                                         stack_pointer_rtx,
22007                                                         -4 * num_regs));
22008             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22009                                                         stack_pointer_rtx,
22010                                                         -4 * (num_regs - 1)));
22011             tmp0 = gen_rtx_SET (stack_pointer_rtx,
22012                                 plus_constant (Pmode, stack_pointer_rtx,
22013                                                -4 * (num_regs)));
22014             tmp1 = gen_rtx_SET (mem1, reg1);
22015             tmp2 = gen_rtx_SET (mem2, reg2);
22016             RTX_FRAME_RELATED_P (tmp0) = 1;
22017             RTX_FRAME_RELATED_P (tmp1) = 1;
22018             RTX_FRAME_RELATED_P (tmp2) = 1;
22019             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22020             XVECEXP (par, 0, 0) = tmp0;
22021             XVECEXP (par, 0, 1) = tmp1;
22022             XVECEXP (par, 0, 2) = tmp2;
22023             insn = emit_insn (par);
22024             RTX_FRAME_RELATED_P (insn) = 1;
22025             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22026           }
22027         else
22028           {
22029             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22030                                                         stack_pointer_rtx,
22031                                                         4 * i));
22032             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22033                                                         stack_pointer_rtx,
22034                                                         4 * (i + 1)));
22035             tmp1 = gen_rtx_SET (mem1, reg1);
22036             tmp2 = gen_rtx_SET (mem2, reg2);
22037             RTX_FRAME_RELATED_P (tmp1) = 1;
22038             RTX_FRAME_RELATED_P (tmp2) = 1;
22039             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22040             XVECEXP (par, 0, 0) = tmp1;
22041             XVECEXP (par, 0, 1) = tmp2;
22042             emit_insn (par);
22043           }
22044
22045         /* Create unwind information.  This is an approximation.  */
22046         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22047                                            plus_constant (Pmode,
22048                                                           stack_pointer_rtx,
22049                                                           4 * i)),
22050                             reg1);
22051         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22052                                            plus_constant (Pmode,
22053                                                           stack_pointer_rtx,
22054                                                           4 * (i + 1))),
22055                             reg2);
22056
22057         RTX_FRAME_RELATED_P (tmp1) = 1;
22058         RTX_FRAME_RELATED_P (tmp2) = 1;
22059         XVECEXP (dwarf, 0, i + 1) = tmp1;
22060         XVECEXP (dwarf, 0, i + 2) = tmp2;
22061         i += 2;
22062         regno = regno2 + 1;
22063       }
22064     else
22065       regno++;
22066
22067   return;
22068 }
22069
22070 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22071    whenever possible, otherwise it emits single-word stores.  The first store
22072    also allocates stack space for all saved registers, using writeback with
22073    post-addressing mode.  All other stores use offset addressing.  If no STRD
22074    can be emitted, this function emits a sequence of single-word stores,
22075    and not an STM as before, because single-word stores provide more freedom
22076    scheduling and can be turned into an STM by peephole optimizations.  */
22077 static void
22078 arm_emit_strd_push (unsigned long saved_regs_mask)
22079 {
22080   int num_regs = 0;
22081   int i, j, dwarf_index  = 0;
22082   int offset = 0;
22083   rtx dwarf = NULL_RTX;
22084   rtx insn = NULL_RTX;
22085   rtx tmp, mem;
22086
22087   /* TODO: A more efficient code can be emitted by changing the
22088      layout, e.g., first push all pairs that can use STRD to keep the
22089      stack aligned, and then push all other registers.  */
22090   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22091     if (saved_regs_mask & (1 << i))
22092       num_regs++;
22093
22094   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22095   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22096   gcc_assert (num_regs > 0);
22097
22098   /* Create sequence for DWARF info.  */
22099   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22100
22101   /* For dwarf info, we generate explicit stack update.  */
22102   tmp = gen_rtx_SET (stack_pointer_rtx,
22103                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22104   RTX_FRAME_RELATED_P (tmp) = 1;
22105   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22106
22107   /* Save registers.  */
22108   offset = - 4 * num_regs;
22109   j = 0;
22110   while (j <= LAST_ARM_REGNUM)
22111     if (saved_regs_mask & (1 << j))
22112       {
22113         if ((j % 2 == 0)
22114             && (saved_regs_mask & (1 << (j + 1))))
22115           {
22116             /* Current register and previous register form register pair for
22117                which STRD can be generated.  */
22118             if (offset < 0)
22119               {
22120                 /* Allocate stack space for all saved registers.  */
22121                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22122                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22123                 mem = gen_frame_mem (DImode, tmp);
22124                 offset = 0;
22125               }
22126             else if (offset > 0)
22127               mem = gen_frame_mem (DImode,
22128                                    plus_constant (Pmode,
22129                                                   stack_pointer_rtx,
22130                                                   offset));
22131             else
22132               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22133
22134             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22135             RTX_FRAME_RELATED_P (tmp) = 1;
22136             tmp = emit_insn (tmp);
22137
22138             /* Record the first store insn.  */
22139             if (dwarf_index == 1)
22140               insn = tmp;
22141
22142             /* Generate dwarf info.  */
22143             mem = gen_frame_mem (SImode,
22144                                  plus_constant (Pmode,
22145                                                 stack_pointer_rtx,
22146                                                 offset));
22147             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22148             RTX_FRAME_RELATED_P (tmp) = 1;
22149             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22150
22151             mem = gen_frame_mem (SImode,
22152                                  plus_constant (Pmode,
22153                                                 stack_pointer_rtx,
22154                                                 offset + 4));
22155             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22156             RTX_FRAME_RELATED_P (tmp) = 1;
22157             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22158
22159             offset += 8;
22160             j += 2;
22161           }
22162         else
22163           {
22164             /* Emit a single word store.  */
22165             if (offset < 0)
22166               {
22167                 /* Allocate stack space for all saved registers.  */
22168                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22169                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22170                 mem = gen_frame_mem (SImode, tmp);
22171                 offset = 0;
22172               }
22173             else if (offset > 0)
22174               mem = gen_frame_mem (SImode,
22175                                    plus_constant (Pmode,
22176                                                   stack_pointer_rtx,
22177                                                   offset));
22178             else
22179               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22180
22181             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22182             RTX_FRAME_RELATED_P (tmp) = 1;
22183             tmp = emit_insn (tmp);
22184
22185             /* Record the first store insn.  */
22186             if (dwarf_index == 1)
22187               insn = tmp;
22188
22189             /* Generate dwarf info.  */
22190             mem = gen_frame_mem (SImode,
22191                                  plus_constant(Pmode,
22192                                                stack_pointer_rtx,
22193                                                offset));
22194             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22195             RTX_FRAME_RELATED_P (tmp) = 1;
22196             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22197
22198             offset += 4;
22199             j += 1;
22200           }
22201       }
22202     else
22203       j++;
22204
22205   /* Attach dwarf info to the first insn we generate.  */
22206   gcc_assert (insn != NULL_RTX);
22207   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22208   RTX_FRAME_RELATED_P (insn) = 1;
22209 }
22210
22211 /* Generate and emit an insn that we will recognize as a push_multi.
22212    Unfortunately, since this insn does not reflect very well the actual
22213    semantics of the operation, we need to annotate the insn for the benefit
22214    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22215    MASK for registers that should be annotated for DWARF2 frame unwind
22216    information.  */
22217 static rtx
22218 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22219 {
22220   int num_regs = 0;
22221   int num_dwarf_regs = 0;
22222   int i, j;
22223   rtx par;
22224   rtx dwarf;
22225   int dwarf_par_index;
22226   rtx tmp, reg;
22227
22228   /* We don't record the PC in the dwarf frame information.  */
22229   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22230
22231   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22232     {
22233       if (mask & (1 << i))
22234         num_regs++;
22235       if (dwarf_regs_mask & (1 << i))
22236         num_dwarf_regs++;
22237     }
22238
22239   gcc_assert (num_regs && num_regs <= 16);
22240   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22241
22242   /* For the body of the insn we are going to generate an UNSPEC in
22243      parallel with several USEs.  This allows the insn to be recognized
22244      by the push_multi pattern in the arm.md file.
22245
22246      The body of the insn looks something like this:
22247
22248        (parallel [
22249            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22250                                         (const_int:SI <num>)))
22251                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22252            (use (reg:SI XX))
22253            (use (reg:SI YY))
22254            ...
22255         ])
22256
22257      For the frame note however, we try to be more explicit and actually
22258      show each register being stored into the stack frame, plus a (single)
22259      decrement of the stack pointer.  We do it this way in order to be
22260      friendly to the stack unwinding code, which only wants to see a single
22261      stack decrement per instruction.  The RTL we generate for the note looks
22262      something like this:
22263
22264       (sequence [
22265            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22266            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22267            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22268            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22269            ...
22270         ])
22271
22272      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22273      instead we'd have a parallel expression detailing all
22274      the stores to the various memory addresses so that debug
22275      information is more up-to-date. Remember however while writing
22276      this to take care of the constraints with the push instruction.
22277
22278      Note also that this has to be taken care of for the VFP registers.
22279
22280      For more see PR43399.  */
22281
22282   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22283   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22284   dwarf_par_index = 1;
22285
22286   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22287     {
22288       if (mask & (1 << i))
22289         {
22290           /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22291              following example reg-reg copy of SP to IP register is handled
22292              through .cfi_def_cfa_register directive and the .cfi_offset
22293              directive for IP register is skipped by dwarf code emitter.
22294              Example:
22295                 mov     ip, sp
22296                 .cfi_def_cfa_register 12
22297                 push    {fp, ip, lr, pc}
22298                 .cfi_offset 11, -16
22299                 .cfi_offset 13, -12
22300                 .cfi_offset 14, -8
22301
22302              Where as Arm-specific .save directive handling is different to that
22303              of dwarf code emitter and it doesn't consider reg-reg copies while
22304              updating the register list.  When PACBTI is enabled we manually
22305              updated the .save directive register list to use "ra_auth_code"
22306              (pseduo register 143) instead of IP register as shown in following
22307              pseduo code.
22308              Example:
22309                 pacbti  ip, lr, sp
22310                 .cfi_register 143, 12
22311                 push    {r3, r7, ip, lr}
22312                 .save {r3, r7, ra_auth_code, lr}
22313           */
22314           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22315           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22316             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22317
22318           XVECEXP (par, 0, 0)
22319             = gen_rtx_SET (gen_frame_mem
22320                            (BLKmode,
22321                             gen_rtx_PRE_MODIFY (Pmode,
22322                                                 stack_pointer_rtx,
22323                                                 plus_constant
22324                                                 (Pmode, stack_pointer_rtx,
22325                                                  -4 * num_regs))
22326                             ),
22327                            gen_rtx_UNSPEC (BLKmode,
22328                                            gen_rtvec (1, reg),
22329                                            UNSPEC_PUSH_MULT));
22330
22331           if (dwarf_regs_mask & (1 << i))
22332             {
22333               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22334                                  dwarf_reg);
22335               RTX_FRAME_RELATED_P (tmp) = 1;
22336               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22337             }
22338
22339           break;
22340         }
22341     }
22342
22343   for (j = 1, i++; j < num_regs; i++)
22344     {
22345       if (mask & (1 << i))
22346         {
22347           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22348           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22349             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22350
22351           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22352
22353           if (dwarf_regs_mask & (1 << i))
22354             {
22355               tmp
22356                 = gen_rtx_SET (gen_frame_mem
22357                                (SImode,
22358                                 plus_constant (Pmode, stack_pointer_rtx,
22359                                                4 * j)),
22360                                dwarf_reg);
22361               RTX_FRAME_RELATED_P (tmp) = 1;
22362               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22363             }
22364
22365           j++;
22366         }
22367     }
22368
22369   par = emit_insn (par);
22370
22371   tmp = gen_rtx_SET (stack_pointer_rtx,
22372                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22373   RTX_FRAME_RELATED_P (tmp) = 1;
22374   XVECEXP (dwarf, 0, 0) = tmp;
22375
22376   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22377
22378   return par;
22379 }
22380
22381 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22382    SIZE is the offset to be adjusted.
22383    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22384 static void
22385 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22386 {
22387   rtx dwarf;
22388
22389   RTX_FRAME_RELATED_P (insn) = 1;
22390   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22391   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22392 }
22393
22394 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22395    SAVED_REGS_MASK shows which registers need to be restored.
22396
22397    Unfortunately, since this insn does not reflect very well the actual
22398    semantics of the operation, we need to annotate the insn for the benefit
22399    of DWARF2 frame unwind information.  */
22400 static void
22401 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22402 {
22403   int num_regs = 0;
22404   int i, j;
22405   rtx par;
22406   rtx dwarf = NULL_RTX;
22407   rtx tmp, reg;
22408   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22409   int offset_adj;
22410   int emit_update;
22411
22412   offset_adj = return_in_pc ? 1 : 0;
22413   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22414     if (saved_regs_mask & (1 << i))
22415       num_regs++;
22416
22417   gcc_assert (num_regs && num_regs <= 16);
22418
22419   /* If SP is in reglist, then we don't emit SP update insn.  */
22420   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22421
22422   /* The parallel needs to hold num_regs SETs
22423      and one SET for the stack update.  */
22424   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22425
22426   if (return_in_pc)
22427     XVECEXP (par, 0, 0) = ret_rtx;
22428
22429   if (emit_update)
22430     {
22431       /* Increment the stack pointer, based on there being
22432          num_regs 4-byte registers to restore.  */
22433       tmp = gen_rtx_SET (stack_pointer_rtx,
22434                          plus_constant (Pmode,
22435                                         stack_pointer_rtx,
22436                                         4 * num_regs));
22437       RTX_FRAME_RELATED_P (tmp) = 1;
22438       XVECEXP (par, 0, offset_adj) = tmp;
22439     }
22440
22441   /* Now restore every reg, which may include PC.  */
22442   for (j = 0, i = 0; j < num_regs; i++)
22443     if (saved_regs_mask & (1 << i))
22444       {
22445         rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22446         if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22447           dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22448         if ((num_regs == 1) && emit_update && !return_in_pc)
22449           {
22450             /* Emit single load with writeback.  */
22451             tmp = gen_frame_mem (SImode,
22452                                  gen_rtx_POST_INC (Pmode,
22453                                                    stack_pointer_rtx));
22454             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22455             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22456                                               dwarf);
22457             return;
22458           }
22459
22460         tmp = gen_rtx_SET (reg,
22461                            gen_frame_mem
22462                            (SImode,
22463                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22464         RTX_FRAME_RELATED_P (tmp) = 1;
22465         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22466
22467         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22468            should not have PC, skip PC.  */
22469         if (i != PC_REGNUM)
22470           dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22471
22472         j++;
22473       }
22474
22475   if (return_in_pc)
22476     par = emit_jump_insn (par);
22477   else
22478     par = emit_insn (par);
22479
22480   REG_NOTES (par) = dwarf;
22481   if (!return_in_pc)
22482     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22483                                  stack_pointer_rtx, stack_pointer_rtx);
22484 }
22485
22486 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22487    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22488
22489    Unfortunately, since this insn does not reflect very well the actual
22490    semantics of the operation, we need to annotate the insn for the benefit
22491    of DWARF2 frame unwind information.  */
22492 static void
22493 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22494 {
22495   int i, j;
22496   rtx par;
22497   rtx dwarf = NULL_RTX;
22498   rtx tmp, reg;
22499
22500   gcc_assert (num_regs && num_regs <= 32);
22501
22502     /* Workaround ARM10 VFPr1 bug.  */
22503   if (num_regs == 2 && !arm_arch6)
22504     {
22505       if (first_reg == 15)
22506         first_reg--;
22507
22508       num_regs++;
22509     }
22510
22511   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22512      there could be up to 32 D-registers to restore.
22513      If there are more than 16 D-registers, make two recursive calls,
22514      each of which emits one pop_multi instruction.  */
22515   if (num_regs > 16)
22516     {
22517       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22518       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22519       return;
22520     }
22521
22522   /* The parallel needs to hold num_regs SETs
22523      and one SET for the stack update.  */
22524   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22525
22526   /* Increment the stack pointer, based on there being
22527      num_regs 8-byte registers to restore.  */
22528   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22529   RTX_FRAME_RELATED_P (tmp) = 1;
22530   XVECEXP (par, 0, 0) = tmp;
22531
22532   /* Now show every reg that will be restored, using a SET for each.  */
22533   for (j = 0, i=first_reg; j < num_regs; i += 2)
22534     {
22535       reg = gen_rtx_REG (DFmode, i);
22536
22537       tmp = gen_rtx_SET (reg,
22538                          gen_frame_mem
22539                          (DFmode,
22540                           plus_constant (Pmode, base_reg, 8 * j)));
22541       RTX_FRAME_RELATED_P (tmp) = 1;
22542       XVECEXP (par, 0, j + 1) = tmp;
22543
22544       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22545
22546       j++;
22547     }
22548
22549   par = emit_insn (par);
22550   REG_NOTES (par) = dwarf;
22551
22552   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22553   if (REGNO (base_reg) == IP_REGNUM)
22554     {
22555       RTX_FRAME_RELATED_P (par) = 1;
22556       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22557     }
22558   else
22559     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22560                                  base_reg, base_reg);
22561 }
22562
22563 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22564    number of registers are being popped, multiple LDRD patterns are created for
22565    all register pairs.  If odd number of registers are popped, last register is
22566    loaded by using LDR pattern.  */
22567 static void
22568 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22569 {
22570   int num_regs = 0;
22571   int i, j;
22572   rtx par = NULL_RTX;
22573   rtx dwarf = NULL_RTX;
22574   rtx tmp, reg, tmp1;
22575   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22576
22577   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22578     if (saved_regs_mask & (1 << i))
22579       num_regs++;
22580
22581   gcc_assert (num_regs && num_regs <= 16);
22582
22583   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22584      to be popped.  So, if num_regs is even, now it will become odd,
22585      and we can generate pop with PC.  If num_regs is odd, it will be
22586      even now, and ldr with return can be generated for PC.  */
22587   if (return_in_pc)
22588     num_regs--;
22589
22590   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22591
22592   /* Var j iterates over all the registers to gather all the registers in
22593      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22594      A PARALLEL RTX of register-pair is created here, so that pattern for
22595      LDRD can be matched.  As PC is always last register to be popped, and
22596      we have already decremented num_regs if PC, we don't have to worry
22597      about PC in this loop.  */
22598   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22599     if (saved_regs_mask & (1 << j))
22600       {
22601         /* Create RTX for memory load.  */
22602         reg = gen_rtx_REG (SImode, j);
22603         tmp = gen_rtx_SET (reg,
22604                            gen_frame_mem (SImode,
22605                                plus_constant (Pmode,
22606                                               stack_pointer_rtx, 4 * i)));
22607         RTX_FRAME_RELATED_P (tmp) = 1;
22608
22609         if (i % 2 == 0)
22610           {
22611             /* When saved-register index (i) is even, the RTX to be emitted is
22612                yet to be created.  Hence create it first.  The LDRD pattern we
22613                are generating is :
22614                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22615                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22616                where target registers need not be consecutive.  */
22617             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22618             dwarf = NULL_RTX;
22619           }
22620
22621         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22622            added as 0th element and if i is odd, reg_i is added as 1st element
22623            of LDRD pattern shown above.  */
22624         XVECEXP (par, 0, (i % 2)) = tmp;
22625         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22626
22627         if ((i % 2) == 1)
22628           {
22629             /* When saved-register index (i) is odd, RTXs for both the registers
22630                to be loaded are generated in above given LDRD pattern, and the
22631                pattern can be emitted now.  */
22632             par = emit_insn (par);
22633             REG_NOTES (par) = dwarf;
22634             RTX_FRAME_RELATED_P (par) = 1;
22635           }
22636
22637         i++;
22638       }
22639
22640   /* If the number of registers pushed is odd AND return_in_pc is false OR
22641      number of registers are even AND return_in_pc is true, last register is
22642      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22643      then LDR with post increment.  */
22644
22645   /* Increment the stack pointer, based on there being
22646      num_regs 4-byte registers to restore.  */
22647   tmp = gen_rtx_SET (stack_pointer_rtx,
22648                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22649   RTX_FRAME_RELATED_P (tmp) = 1;
22650   tmp = emit_insn (tmp);
22651   if (!return_in_pc)
22652     {
22653       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22654                                    stack_pointer_rtx, stack_pointer_rtx);
22655     }
22656
22657   dwarf = NULL_RTX;
22658
22659   if (((num_regs % 2) == 1 && !return_in_pc)
22660       || ((num_regs % 2) == 0 && return_in_pc))
22661     {
22662       /* Scan for the single register to be popped.  Skip until the saved
22663          register is found.  */
22664       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22665
22666       /* Gen LDR with post increment here.  */
22667       tmp1 = gen_rtx_MEM (SImode,
22668                           gen_rtx_POST_INC (SImode,
22669                                             stack_pointer_rtx));
22670       set_mem_alias_set (tmp1, get_frame_alias_set ());
22671
22672       reg = gen_rtx_REG (SImode, j);
22673       tmp = gen_rtx_SET (reg, tmp1);
22674       RTX_FRAME_RELATED_P (tmp) = 1;
22675       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22676
22677       if (return_in_pc)
22678         {
22679           /* If return_in_pc, j must be PC_REGNUM.  */
22680           gcc_assert (j == PC_REGNUM);
22681           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22682           XVECEXP (par, 0, 0) = ret_rtx;
22683           XVECEXP (par, 0, 1) = tmp;
22684           par = emit_jump_insn (par);
22685         }
22686       else
22687         {
22688           par = emit_insn (tmp);
22689           REG_NOTES (par) = dwarf;
22690           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22691                                        stack_pointer_rtx, stack_pointer_rtx);
22692         }
22693
22694     }
22695   else if ((num_regs % 2) == 1 && return_in_pc)
22696     {
22697       /* There are 2 registers to be popped.  So, generate the pattern
22698          pop_multiple_with_stack_update_and_return to pop in PC.  */
22699       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22700     }
22701
22702   return;
22703 }
22704
22705 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22706    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22707    offset addressing and then generates one separate stack udpate. This provides
22708    more scheduling freedom, compared to writeback on every load.  However,
22709    if the function returns using load into PC directly
22710    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22711    before the last load.  TODO: Add a peephole optimization to recognize
22712    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22713    peephole optimization to merge the load at stack-offset zero
22714    with the stack update instruction using load with writeback
22715    in post-index addressing mode.  */
22716 static void
22717 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22718 {
22719   int j = 0;
22720   int offset = 0;
22721   rtx par = NULL_RTX;
22722   rtx dwarf = NULL_RTX;
22723   rtx tmp, mem;
22724
22725   /* Restore saved registers.  */
22726   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22727   j = 0;
22728   while (j <= LAST_ARM_REGNUM)
22729     if (saved_regs_mask & (1 << j))
22730       {
22731         if ((j % 2) == 0
22732             && (saved_regs_mask & (1 << (j + 1)))
22733             && (j + 1) != PC_REGNUM)
22734           {
22735             /* Current register and next register form register pair for which
22736                LDRD can be generated. PC is always the last register popped, and
22737                we handle it separately.  */
22738             if (offset > 0)
22739               mem = gen_frame_mem (DImode,
22740                                    plus_constant (Pmode,
22741                                                   stack_pointer_rtx,
22742                                                   offset));
22743             else
22744               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22745
22746             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22747             tmp = emit_insn (tmp);
22748             RTX_FRAME_RELATED_P (tmp) = 1;
22749
22750             /* Generate dwarf info.  */
22751
22752             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22753                                     gen_rtx_REG (SImode, j),
22754                                     NULL_RTX);
22755             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22756                                     gen_rtx_REG (SImode, j + 1),
22757                                     dwarf);
22758
22759             REG_NOTES (tmp) = dwarf;
22760
22761             offset += 8;
22762             j += 2;
22763           }
22764         else if (j != PC_REGNUM)
22765           {
22766             /* Emit a single word load.  */
22767             if (offset > 0)
22768               mem = gen_frame_mem (SImode,
22769                                    plus_constant (Pmode,
22770                                                   stack_pointer_rtx,
22771                                                   offset));
22772             else
22773               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22774
22775             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22776             tmp = emit_insn (tmp);
22777             RTX_FRAME_RELATED_P (tmp) = 1;
22778
22779             /* Generate dwarf info.  */
22780             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22781                                               gen_rtx_REG (SImode, j),
22782                                               NULL_RTX);
22783
22784             offset += 4;
22785             j += 1;
22786           }
22787         else /* j == PC_REGNUM */
22788           j++;
22789       }
22790     else
22791       j++;
22792
22793   /* Update the stack.  */
22794   if (offset > 0)
22795     {
22796       tmp = gen_rtx_SET (stack_pointer_rtx,
22797                          plus_constant (Pmode,
22798                                         stack_pointer_rtx,
22799                                         offset));
22800       tmp = emit_insn (tmp);
22801       arm_add_cfa_adjust_cfa_note (tmp, offset,
22802                                    stack_pointer_rtx, stack_pointer_rtx);
22803       offset = 0;
22804     }
22805
22806   if (saved_regs_mask & (1 << PC_REGNUM))
22807     {
22808       /* Only PC is to be popped.  */
22809       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22810       XVECEXP (par, 0, 0) = ret_rtx;
22811       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22812                          gen_frame_mem (SImode,
22813                                         gen_rtx_POST_INC (SImode,
22814                                                           stack_pointer_rtx)));
22815       RTX_FRAME_RELATED_P (tmp) = 1;
22816       XVECEXP (par, 0, 1) = tmp;
22817       par = emit_jump_insn (par);
22818
22819       /* Generate dwarf info.  */
22820       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22821                               gen_rtx_REG (SImode, PC_REGNUM),
22822                               NULL_RTX);
22823       REG_NOTES (par) = dwarf;
22824       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22825                                    stack_pointer_rtx, stack_pointer_rtx);
22826     }
22827 }
22828
22829 /* Calculate the size of the return value that is passed in registers.  */
22830 static unsigned
22831 arm_size_return_regs (void)
22832 {
22833   machine_mode mode;
22834
22835   if (crtl->return_rtx != 0)
22836     mode = GET_MODE (crtl->return_rtx);
22837   else
22838     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22839
22840   return GET_MODE_SIZE (mode);
22841 }
22842
22843 /* Return true if the current function needs to save/restore LR.  */
22844 static bool
22845 thumb_force_lr_save (void)
22846 {
22847   return !cfun->machine->lr_save_eliminated
22848          && (!crtl->is_leaf
22849              || thumb_far_jump_used_p ()
22850              || df_regs_ever_live_p (LR_REGNUM));
22851 }
22852
22853 /* We do not know if r3 will be available because
22854    we do have an indirect tailcall happening in this
22855    particular case.  */
22856 static bool
22857 is_indirect_tailcall_p (rtx call)
22858 {
22859   rtx pat = PATTERN (call);
22860
22861   /* Indirect tail call.  */
22862   pat = XVECEXP (pat, 0, 0);
22863   if (GET_CODE (pat) == SET)
22864     pat = SET_SRC (pat);
22865
22866   pat = XEXP (XEXP (pat, 0), 0);
22867   return REG_P (pat);
22868 }
22869
22870 /* Return true if r3 is used by any of the tail call insns in the
22871    current function.  */
22872 static bool
22873 any_sibcall_could_use_r3 (void)
22874 {
22875   edge_iterator ei;
22876   edge e;
22877
22878   if (!crtl->tail_call_emit)
22879     return false;
22880   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22881     if (e->flags & EDGE_SIBCALL)
22882       {
22883         rtx_insn *call = BB_END (e->src);
22884         if (!CALL_P (call))
22885           call = prev_nonnote_nondebug_insn (call);
22886         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22887         if (find_regno_fusage (call, USE, 3)
22888             || is_indirect_tailcall_p (call))
22889           return true;
22890       }
22891   return false;
22892 }
22893
22894
22895 /* Compute the distance from register FROM to register TO.
22896    These can be the arg pointer (26), the soft frame pointer (25),
22897    the stack pointer (13) or the hard frame pointer (11).
22898    In thumb mode r7 is used as the soft frame pointer, if needed.
22899    Typical stack layout looks like this:
22900
22901        old stack pointer -> |    |
22902                              ----
22903                             |    | \
22904                             |    |   saved arguments for
22905                             |    |   vararg functions
22906                             |    | /
22907                               --
22908    hard FP & arg pointer -> |    | \
22909                             |    |   stack
22910                             |    |   frame
22911                             |    | /
22912                               --
22913                             |    | \
22914                             |    |   call saved
22915                             |    |   registers
22916       soft frame pointer -> |    | /
22917                               --
22918                             |    | \
22919                             |    |   local
22920                             |    |   variables
22921      locals base pointer -> |    | /
22922                               --
22923                             |    | \
22924                             |    |   outgoing
22925                             |    |   arguments
22926    current stack pointer -> |    | /
22927                               --
22928
22929   For a given function some or all of these stack components
22930   may not be needed, giving rise to the possibility of
22931   eliminating some of the registers.
22932
22933   The values returned by this function must reflect the behavior
22934   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22935
22936   The sign of the number returned reflects the direction of stack
22937   growth, so the values are positive for all eliminations except
22938   from the soft frame pointer to the hard frame pointer.
22939
22940   SFP may point just inside the local variables block to ensure correct
22941   alignment.  */
22942
22943
22944 /* Return cached stack offsets.  */
22945
22946 static arm_stack_offsets *
22947 arm_get_frame_offsets (void)
22948 {
22949   struct arm_stack_offsets *offsets;
22950
22951   offsets = &cfun->machine->stack_offsets;
22952
22953   return offsets;
22954 }
22955
22956
22957 /* Calculate stack offsets.  These are used to calculate register elimination
22958    offsets and in prologue/epilogue code.  Also calculates which registers
22959    should be saved.  */
22960
22961 static void
22962 arm_compute_frame_layout (void)
22963 {
22964   struct arm_stack_offsets *offsets;
22965   unsigned long func_type;
22966   int saved;
22967   int core_saved;
22968   HOST_WIDE_INT frame_size;
22969   int i;
22970
22971   offsets = &cfun->machine->stack_offsets;
22972
22973   /* Initially this is the size of the local variables.  It will translated
22974      into an offset once we have determined the size of preceding data.  */
22975   frame_size = ROUND_UP_WORD (get_frame_size ());
22976
22977   /* Space for variadic functions.  */
22978   offsets->saved_args = crtl->args.pretend_args_size;
22979
22980   /* In Thumb mode this is incorrect, but never used.  */
22981   offsets->frame
22982     = (offsets->saved_args
22983        + arm_compute_static_chain_stack_bytes ()
22984        + (frame_pointer_needed ? 4 : 0));
22985
22986   if (TARGET_32BIT)
22987     {
22988       unsigned int regno;
22989
22990       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22991       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22992       saved = core_saved;
22993
22994       /* We know that SP will be doubleword aligned on entry, and we must
22995          preserve that condition at any subroutine call.  We also require the
22996          soft frame pointer to be doubleword aligned.  */
22997
22998       if (TARGET_REALLY_IWMMXT)
22999         {
23000           /* Check for the call-saved iWMMXt registers.  */
23001           for (regno = FIRST_IWMMXT_REGNUM;
23002                regno <= LAST_IWMMXT_REGNUM;
23003                regno++)
23004             if (reg_needs_saving_p (regno))
23005               saved += 8;
23006         }
23007
23008       func_type = arm_current_func_type ();
23009       /* Space for saved VFP registers.  */
23010       if (! IS_VOLATILE (func_type)
23011           && TARGET_VFP_BASE)
23012         saved += arm_get_vfp_saved_size ();
23013
23014       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23015          nonecure entry functions with VSTR/VLDR.  */
23016       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23017         saved += 4;
23018     }
23019   else /* TARGET_THUMB1 */
23020     {
23021       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23022       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23023       saved = core_saved;
23024       if (TARGET_BACKTRACE)
23025         saved += 16;
23026     }
23027
23028   /* Saved registers include the stack frame.  */
23029   offsets->saved_regs
23030     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23031   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23032
23033   /* A leaf function does not need any stack alignment if it has nothing
23034      on the stack.  */
23035   if (crtl->is_leaf && frame_size == 0
23036       /* However if it calls alloca(), we have a dynamically allocated
23037          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
23038       && ! cfun->calls_alloca)
23039     {
23040       offsets->outgoing_args = offsets->soft_frame;
23041       offsets->locals_base = offsets->soft_frame;
23042       return;
23043     }
23044
23045   /* Ensure SFP has the correct alignment.  */
23046   if (ARM_DOUBLEWORD_ALIGN
23047       && (offsets->soft_frame & 7))
23048     {
23049       offsets->soft_frame += 4;
23050       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23051          when there is a stack frame as the alignment will be rolled into
23052          the normal stack adjustment.  */
23053       if (frame_size + crtl->outgoing_args_size == 0)
23054         {
23055           int reg = -1;
23056
23057           /* Register r3 is caller-saved.  Normally it does not need to be
23058              saved on entry by the prologue.  However if we choose to save
23059              it for padding then we may confuse the compiler into thinking
23060              a prologue sequence is required when in fact it is not.  This
23061              will occur when shrink-wrapping if r3 is used as a scratch
23062              register and there are no other callee-saved writes.
23063
23064              This situation can be avoided when other callee-saved registers
23065              are available and r3 is not mandatory if we choose a callee-saved
23066              register for padding.  */
23067           bool prefer_callee_reg_p = false;
23068
23069           /* If it is safe to use r3, then do so.  This sometimes
23070              generates better code on Thumb-2 by avoiding the need to
23071              use 32-bit push/pop instructions.  */
23072           if (! any_sibcall_could_use_r3 ()
23073               && arm_size_return_regs () <= 12
23074               && (offsets->saved_regs_mask & (1 << 3)) == 0
23075               && (TARGET_THUMB2
23076                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23077             {
23078               reg = 3;
23079               if (!TARGET_THUMB2)
23080                 prefer_callee_reg_p = true;
23081             }
23082           if (reg == -1
23083               || prefer_callee_reg_p)
23084             {
23085               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23086                 {
23087                   /* Avoid fixed registers; they may be changed at
23088                      arbitrary times so it's unsafe to restore them
23089                      during the epilogue.  */
23090                   if (!fixed_regs[i]
23091                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23092                     {
23093                       reg = i;
23094                       break;
23095                     }
23096                 }
23097             }
23098
23099           if (reg != -1)
23100             {
23101               offsets->saved_regs += 4;
23102               offsets->saved_regs_mask |= (1 << reg);
23103             }
23104         }
23105     }
23106
23107   offsets->locals_base = offsets->soft_frame + frame_size;
23108   offsets->outgoing_args = (offsets->locals_base
23109                             + crtl->outgoing_args_size);
23110
23111   if (ARM_DOUBLEWORD_ALIGN)
23112     {
23113       /* Ensure SP remains doubleword aligned.  */
23114       if (offsets->outgoing_args & 7)
23115         offsets->outgoing_args += 4;
23116       gcc_assert (!(offsets->outgoing_args & 7));
23117     }
23118 }
23119
23120
23121 /* Calculate the relative offsets for the different stack pointers.  Positive
23122    offsets are in the direction of stack growth.  */
23123
23124 HOST_WIDE_INT
23125 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23126 {
23127   arm_stack_offsets *offsets;
23128
23129   offsets = arm_get_frame_offsets ();
23130
23131   /* OK, now we have enough information to compute the distances.
23132      There must be an entry in these switch tables for each pair
23133      of registers in ELIMINABLE_REGS, even if some of the entries
23134      seem to be redundant or useless.  */
23135   switch (from)
23136     {
23137     case ARG_POINTER_REGNUM:
23138       switch (to)
23139         {
23140         case THUMB_HARD_FRAME_POINTER_REGNUM:
23141           return 0;
23142
23143         case FRAME_POINTER_REGNUM:
23144           /* This is the reverse of the soft frame pointer
23145              to hard frame pointer elimination below.  */
23146           return offsets->soft_frame - offsets->saved_args;
23147
23148         case ARM_HARD_FRAME_POINTER_REGNUM:
23149           /* This is only non-zero in the case where the static chain register
23150              is stored above the frame.  */
23151           return offsets->frame - offsets->saved_args - 4;
23152
23153         case STACK_POINTER_REGNUM:
23154           /* If nothing has been pushed on the stack at all
23155              then this will return -4.  This *is* correct!  */
23156           return offsets->outgoing_args - (offsets->saved_args + 4);
23157
23158         default:
23159           gcc_unreachable ();
23160         }
23161       gcc_unreachable ();
23162
23163     case FRAME_POINTER_REGNUM:
23164       switch (to)
23165         {
23166         case THUMB_HARD_FRAME_POINTER_REGNUM:
23167           return 0;
23168
23169         case ARM_HARD_FRAME_POINTER_REGNUM:
23170           /* The hard frame pointer points to the top entry in the
23171              stack frame.  The soft frame pointer to the bottom entry
23172              in the stack frame.  If there is no stack frame at all,
23173              then they are identical.  */
23174
23175           return offsets->frame - offsets->soft_frame;
23176
23177         case STACK_POINTER_REGNUM:
23178           return offsets->outgoing_args - offsets->soft_frame;
23179
23180         default:
23181           gcc_unreachable ();
23182         }
23183       gcc_unreachable ();
23184
23185     default:
23186       /* You cannot eliminate from the stack pointer.
23187          In theory you could eliminate from the hard frame
23188          pointer to the stack pointer, but this will never
23189          happen, since if a stack frame is not needed the
23190          hard frame pointer will never be used.  */
23191       gcc_unreachable ();
23192     }
23193 }
23194
23195 /* Given FROM and TO register numbers, say whether this elimination is
23196    allowed.  Frame pointer elimination is automatically handled.
23197
23198    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23199    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23200    pointer, we must eliminate FRAME_POINTER_REGNUM into
23201    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23202    ARG_POINTER_REGNUM.  */
23203
23204 bool
23205 arm_can_eliminate (const int from, const int to)
23206 {
23207   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23208           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23209           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23210           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23211            true);
23212 }
23213
23214 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23215    number of bytes pushed.  */
23216
23217 static int
23218 arm_save_coproc_regs(void)
23219 {
23220   int saved_size = 0;
23221   unsigned reg;
23222   unsigned start_reg;
23223   rtx insn;
23224
23225   if (TARGET_REALLY_IWMMXT)
23226   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23227     if (reg_needs_saving_p (reg))
23228       {
23229         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23230         insn = gen_rtx_MEM (V2SImode, insn);
23231         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23232         RTX_FRAME_RELATED_P (insn) = 1;
23233         saved_size += 8;
23234       }
23235
23236   if (TARGET_VFP_BASE)
23237     {
23238       start_reg = FIRST_VFP_REGNUM;
23239
23240       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23241         {
23242           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23243             {
23244               if (start_reg != reg)
23245                 saved_size += vfp_emit_fstmd (start_reg,
23246                                               (reg - start_reg) / 2);
23247               start_reg = reg + 2;
23248             }
23249         }
23250       if (start_reg != reg)
23251         saved_size += vfp_emit_fstmd (start_reg,
23252                                       (reg - start_reg) / 2);
23253     }
23254   return saved_size;
23255 }
23256
23257
23258 /* Set the Thumb frame pointer from the stack pointer.  */
23259
23260 static void
23261 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23262 {
23263   HOST_WIDE_INT amount;
23264   rtx insn, dwarf;
23265
23266   amount = offsets->outgoing_args - offsets->locals_base;
23267   if (amount < 1024)
23268     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23269                                   stack_pointer_rtx, GEN_INT (amount)));
23270   else
23271     {
23272       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23273       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23274          expects the first two operands to be the same.  */
23275       if (TARGET_THUMB2)
23276         {
23277           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23278                                         stack_pointer_rtx,
23279                                         hard_frame_pointer_rtx));
23280         }
23281       else
23282         {
23283           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23284                                         hard_frame_pointer_rtx,
23285                                         stack_pointer_rtx));
23286         }
23287       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23288                            plus_constant (Pmode, stack_pointer_rtx, amount));
23289       RTX_FRAME_RELATED_P (dwarf) = 1;
23290       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23291     }
23292
23293   RTX_FRAME_RELATED_P (insn) = 1;
23294 }
23295
23296 struct scratch_reg {
23297   rtx reg;
23298   bool saved;
23299 };
23300
23301 /* Return a short-lived scratch register for use as a 2nd scratch register on
23302    function entry after the registers are saved in the prologue.  This register
23303    must be released by means of release_scratch_register_on_entry.  IP is not
23304    considered since it is always used as the 1st scratch register if available.
23305
23306    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23307    mask of live registers.  */
23308
23309 static void
23310 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23311                                unsigned long live_regs)
23312 {
23313   int regno = -1;
23314
23315   sr->saved = false;
23316
23317   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23318     regno = LR_REGNUM;
23319   else
23320     {
23321       unsigned int i;
23322
23323       for (i = 4; i < 11; i++)
23324         if (regno1 != i && (live_regs & (1 << i)) != 0)
23325           {
23326             regno = i;
23327             break;
23328           }
23329
23330       if (regno < 0)
23331         {
23332           /* If IP is used as the 1st scratch register for a nested function,
23333              then either r3 wasn't available or is used to preserve IP.  */
23334           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23335             regno1 = 3;
23336           regno = (regno1 == 3 ? 2 : 3);
23337           sr->saved
23338             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23339                                regno);
23340         }
23341     }
23342
23343   sr->reg = gen_rtx_REG (SImode, regno);
23344   if (sr->saved)
23345     {
23346       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23347       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23348       rtx x = gen_rtx_SET (stack_pointer_rtx,
23349                            plus_constant (Pmode, stack_pointer_rtx, -4));
23350       RTX_FRAME_RELATED_P (insn) = 1;
23351       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23352     }
23353 }
23354
23355 /* Release a scratch register obtained from the preceding function.  */
23356
23357 static void
23358 release_scratch_register_on_entry (struct scratch_reg *sr)
23359 {
23360   if (sr->saved)
23361     {
23362       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23363       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23364       rtx x = gen_rtx_SET (stack_pointer_rtx,
23365                            plus_constant (Pmode, stack_pointer_rtx, 4));
23366       RTX_FRAME_RELATED_P (insn) = 1;
23367       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23368     }
23369 }
23370
23371 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23372
23373 #if PROBE_INTERVAL > 4096
23374 #error Cannot use indexed addressing mode for stack probing
23375 #endif
23376
23377 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23378    inclusive.  These are offsets from the current stack pointer.  REGNO1
23379    is the index number of the 1st scratch register and LIVE_REGS is the
23380    mask of live registers.  */
23381
23382 static void
23383 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23384                             unsigned int regno1, unsigned long live_regs)
23385 {
23386   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23387
23388   /* See if we have a constant small number of probes to generate.  If so,
23389      that's the easy case.  */
23390   if (size <= PROBE_INTERVAL)
23391     {
23392       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23393       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23394       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23395     }
23396
23397   /* The run-time loop is made up of 10 insns in the generic case while the
23398      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23399   else if (size <= 5 * PROBE_INTERVAL)
23400     {
23401       HOST_WIDE_INT i, rem;
23402
23403       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23404       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23405       emit_stack_probe (reg1);
23406
23407       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23408          it exceeds SIZE.  If only two probes are needed, this will not
23409          generate any code.  Then probe at FIRST + SIZE.  */
23410       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23411         {
23412           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23413           emit_stack_probe (reg1);
23414         }
23415
23416       rem = size - (i - PROBE_INTERVAL);
23417       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23418         {
23419           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23420           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23421         }
23422       else
23423         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23424     }
23425
23426   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23427      extra careful with variables wrapping around because we might be at
23428      the very top (or the very bottom) of the address space and we have
23429      to be able to handle this case properly; in particular, we use an
23430      equality test for the loop condition.  */
23431   else
23432     {
23433       HOST_WIDE_INT rounded_size;
23434       struct scratch_reg sr;
23435
23436       get_scratch_register_on_entry (&sr, regno1, live_regs);
23437
23438       emit_move_insn (reg1, GEN_INT (first));
23439
23440
23441       /* Step 1: round SIZE to the previous multiple of the interval.  */
23442
23443       rounded_size = size & -PROBE_INTERVAL;
23444       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23445
23446
23447       /* Step 2: compute initial and final value of the loop counter.  */
23448
23449       /* TEST_ADDR = SP + FIRST.  */
23450       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23451
23452       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23453       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23454
23455
23456       /* Step 3: the loop
23457
23458          do
23459            {
23460              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23461              probe at TEST_ADDR
23462            }
23463          while (TEST_ADDR != LAST_ADDR)
23464
23465          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23466          until it is equal to ROUNDED_SIZE.  */
23467
23468       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23469
23470
23471       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23472          that SIZE is equal to ROUNDED_SIZE.  */
23473
23474       if (size != rounded_size)
23475         {
23476           HOST_WIDE_INT rem = size - rounded_size;
23477
23478           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23479             {
23480               emit_set_insn (sr.reg,
23481                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23482               emit_stack_probe (plus_constant (Pmode, sr.reg,
23483                                                PROBE_INTERVAL - rem));
23484             }
23485           else
23486             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23487         }
23488
23489       release_scratch_register_on_entry (&sr);
23490     }
23491
23492   /* Make sure nothing is scheduled before we are done.  */
23493   emit_insn (gen_blockage ());
23494 }
23495
23496 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23497    absolute addresses.  */
23498
23499 const char *
23500 output_probe_stack_range (rtx reg1, rtx reg2)
23501 {
23502   static int labelno = 0;
23503   char loop_lab[32];
23504   rtx xops[2];
23505
23506   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23507
23508   /* Loop.  */
23509   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23510
23511   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23512   xops[0] = reg1;
23513   xops[1] = GEN_INT (PROBE_INTERVAL);
23514   output_asm_insn ("sub\t%0, %0, %1", xops);
23515
23516   /* Probe at TEST_ADDR.  */
23517   output_asm_insn ("str\tr0, [%0, #0]", xops);
23518
23519   /* Test if TEST_ADDR == LAST_ADDR.  */
23520   xops[1] = reg2;
23521   output_asm_insn ("cmp\t%0, %1", xops);
23522
23523   /* Branch.  */
23524   fputs ("\tbne\t", asm_out_file);
23525   assemble_name_raw (asm_out_file, loop_lab);
23526   fputc ('\n', asm_out_file);
23527
23528   return "";
23529 }
23530
23531 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23532    function.  */
23533 void
23534 arm_expand_prologue (void)
23535 {
23536   rtx amount;
23537   rtx insn;
23538   rtx ip_rtx;
23539   unsigned long live_regs_mask;
23540   unsigned long func_type;
23541   int fp_offset = 0;
23542   int saved_pretend_args = 0;
23543   int saved_regs = 0;
23544   unsigned HOST_WIDE_INT args_to_push;
23545   HOST_WIDE_INT size;
23546   arm_stack_offsets *offsets;
23547   bool clobber_ip;
23548
23549   func_type = arm_current_func_type ();
23550
23551   /* Naked functions don't have prologues.  */
23552   if (IS_NAKED (func_type))
23553     {
23554       if (flag_stack_usage_info)
23555         current_function_static_stack_size = 0;
23556       return;
23557     }
23558
23559   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23560   args_to_push = crtl->args.pretend_args_size;
23561
23562   /* Compute which register we will have to save onto the stack.  */
23563   offsets = arm_get_frame_offsets ();
23564   live_regs_mask = offsets->saved_regs_mask;
23565
23566   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23567
23568   if (IS_STACKALIGN (func_type))
23569     {
23570       rtx r0, r1;
23571
23572       /* Handle a word-aligned stack pointer.  We generate the following:
23573
23574           mov r0, sp
23575           bic r1, r0, #7
23576           mov sp, r1
23577           <save and restore r0 in normal prologue/epilogue>
23578           mov sp, r0
23579           bx lr
23580
23581          The unwinder doesn't need to know about the stack realignment.
23582          Just tell it we saved SP in r0.  */
23583       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23584
23585       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23586       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23587
23588       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23589       RTX_FRAME_RELATED_P (insn) = 1;
23590       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23591
23592       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23593
23594       /* ??? The CFA changes here, which may cause GDB to conclude that it
23595          has entered a different function.  That said, the unwind info is
23596          correct, individually, before and after this instruction because
23597          we've described the save of SP, which will override the default
23598          handling of SP as restoring from the CFA.  */
23599       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23600     }
23601
23602   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23603      now the value must be -1 as stored by arm_init_machine_status ().  */
23604   cfun->machine->static_chain_stack_bytes
23605     = arm_compute_static_chain_stack_bytes ();
23606
23607   /* The static chain register is the same as the IP register.  If it is
23608      clobbered when creating the frame, we need to save and restore it.  */
23609   clobber_ip = (IS_NESTED (func_type)
23610                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23611                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23612                           || flag_stack_clash_protection)
23613                          && !df_regs_ever_live_p (LR_REGNUM)
23614                          && arm_r3_live_at_start_p ()))
23615                     || arm_current_function_pac_enabled_p ()));
23616
23617   /* Find somewhere to store IP whilst the frame is being created.
23618      We try the following places in order:
23619
23620        1. The last argument register r3 if it is available.
23621        2. A slot on the stack above the frame if there are no
23622           arguments to push onto the stack.
23623        3. Register r3 again, after pushing the argument registers
23624           onto the stack, if this is a varargs function.
23625        4. The last slot on the stack created for the arguments to
23626           push, if this isn't a varargs function.
23627
23628      Note - we only need to tell the dwarf2 backend about the SP
23629      adjustment in the second variant; the static chain register
23630      doesn't need to be unwound, as it doesn't contain a value
23631      inherited from the caller.  */
23632   if (clobber_ip)
23633     {
23634       if (!arm_r3_live_at_start_p ())
23635         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23636       else if (args_to_push == 0)
23637         {
23638           rtx addr, dwarf;
23639
23640           saved_regs += 4;
23641
23642           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23643           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23644           fp_offset = 4;
23645
23646           /* Just tell the dwarf backend that we adjusted SP.  */
23647           dwarf = gen_rtx_SET (stack_pointer_rtx,
23648                                plus_constant (Pmode, stack_pointer_rtx,
23649                                               -fp_offset));
23650           RTX_FRAME_RELATED_P (insn) = 1;
23651           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23652           if (arm_current_function_pac_enabled_p ())
23653             cfun->machine->pacspval_needed = 1;
23654         }
23655       else
23656         {
23657           /* Store the args on the stack.  */
23658           if (cfun->machine->uses_anonymous_args)
23659             {
23660               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23661                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23662               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23663               saved_pretend_args = 1;
23664             }
23665           else
23666             {
23667               rtx addr, dwarf;
23668
23669               if (args_to_push == 4)
23670                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23671               else
23672                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23673                                            plus_constant (Pmode,
23674                                                           stack_pointer_rtx,
23675                                                           -args_to_push));
23676
23677               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23678
23679               /* Just tell the dwarf backend that we adjusted SP.  */
23680               dwarf = gen_rtx_SET (stack_pointer_rtx,
23681                                    plus_constant (Pmode, stack_pointer_rtx,
23682                                                   -args_to_push));
23683               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23684             }
23685
23686           RTX_FRAME_RELATED_P (insn) = 1;
23687           fp_offset = args_to_push;
23688           args_to_push = 0;
23689           if (arm_current_function_pac_enabled_p ())
23690             cfun->machine->pacspval_needed = 1;
23691         }
23692     }
23693
23694   if (arm_current_function_pac_enabled_p ())
23695     {
23696       /* If IP was clobbered we only emit a PAC instruction as the BTI
23697          one will be added before the push of the clobbered IP (if
23698          necessary) by the bti pass.  */
23699       if (aarch_bti_enabled () && !clobber_ip)
23700         insn = emit_insn (gen_pacbti_nop ());
23701       else
23702         insn = emit_insn (gen_pac_nop ());
23703
23704       rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23705       RTX_FRAME_RELATED_P (insn) = 1;
23706       add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23707     }
23708
23709   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23710     {
23711       if (IS_INTERRUPT (func_type))
23712         {
23713           /* Interrupt functions must not corrupt any registers.
23714              Creating a frame pointer however, corrupts the IP
23715              register, so we must push it first.  */
23716           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23717
23718           /* Do not set RTX_FRAME_RELATED_P on this insn.
23719              The dwarf stack unwinding code only wants to see one
23720              stack decrement per function, and this is not it.  If
23721              this instruction is labeled as being part of the frame
23722              creation sequence then dwarf2out_frame_debug_expr will
23723              die when it encounters the assignment of IP to FP
23724              later on, since the use of SP here establishes SP as
23725              the CFA register and not IP.
23726
23727              Anyway this instruction is not really part of the stack
23728              frame creation although it is part of the prologue.  */
23729         }
23730
23731       insn = emit_set_insn (ip_rtx,
23732                             plus_constant (Pmode, stack_pointer_rtx,
23733                                            fp_offset));
23734       RTX_FRAME_RELATED_P (insn) = 1;
23735     }
23736
23737   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23738   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23739     {
23740       saved_regs += 4;
23741       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23742                                                 GEN_INT (FPCXTNS_ENUM)));
23743       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23744                           plus_constant (Pmode, stack_pointer_rtx, -4));
23745       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23746       RTX_FRAME_RELATED_P (insn) = 1;
23747     }
23748
23749   if (args_to_push)
23750     {
23751       /* Push the argument registers, or reserve space for them.  */
23752       if (cfun->machine->uses_anonymous_args)
23753         insn = emit_multi_reg_push
23754           ((0xf0 >> (args_to_push / 4)) & 0xf,
23755            (0xf0 >> (args_to_push / 4)) & 0xf);
23756       else
23757         insn = emit_insn
23758           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23759                        GEN_INT (- args_to_push)));
23760       RTX_FRAME_RELATED_P (insn) = 1;
23761     }
23762
23763   /* If this is an interrupt service routine, and the link register
23764      is going to be pushed, and we're not generating extra
23765      push of IP (needed when frame is needed and frame layout if apcs),
23766      subtracting four from LR now will mean that the function return
23767      can be done with a single instruction.  */
23768   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23769       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23770       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23771       && TARGET_ARM)
23772     {
23773       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23774
23775       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23776     }
23777
23778   if (live_regs_mask)
23779     {
23780       unsigned long dwarf_regs_mask = live_regs_mask;
23781
23782       saved_regs += bit_count (live_regs_mask) * 4;
23783       if (optimize_size && !frame_pointer_needed
23784           && saved_regs == offsets->saved_regs - offsets->saved_args)
23785         {
23786           /* If no coprocessor registers are being pushed and we don't have
23787              to worry about a frame pointer then push extra registers to
23788              create the stack frame.  This is done in a way that does not
23789              alter the frame layout, so is independent of the epilogue.  */
23790           int n;
23791           int frame;
23792           n = 0;
23793           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23794             n++;
23795           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23796           if (frame && n * 4 >= frame)
23797             {
23798               n = frame / 4;
23799               live_regs_mask |= (1 << n) - 1;
23800               saved_regs += frame;
23801             }
23802         }
23803
23804       if (TARGET_LDRD
23805           && current_tune->prefer_ldrd_strd
23806           && !optimize_function_for_size_p (cfun))
23807         {
23808           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23809           if (TARGET_THUMB2)
23810             thumb2_emit_strd_push (live_regs_mask);
23811           else if (TARGET_ARM
23812                    && !TARGET_APCS_FRAME
23813                    && !IS_INTERRUPT (func_type))
23814             arm_emit_strd_push (live_regs_mask);
23815           else
23816             {
23817               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23818               RTX_FRAME_RELATED_P (insn) = 1;
23819             }
23820         }
23821       else
23822         {
23823           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23824           RTX_FRAME_RELATED_P (insn) = 1;
23825         }
23826     }
23827
23828   if (! IS_VOLATILE (func_type))
23829     saved_regs += arm_save_coproc_regs ();
23830
23831   if (frame_pointer_needed && TARGET_ARM)
23832     {
23833       /* Create the new frame pointer.  */
23834       if (TARGET_APCS_FRAME)
23835         {
23836           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23837           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23838           RTX_FRAME_RELATED_P (insn) = 1;
23839         }
23840       else
23841         {
23842           insn = GEN_INT (saved_regs - (4 + fp_offset));
23843           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23844                                         stack_pointer_rtx, insn));
23845           RTX_FRAME_RELATED_P (insn) = 1;
23846         }
23847     }
23848
23849   size = offsets->outgoing_args - offsets->saved_args;
23850   if (flag_stack_usage_info)
23851     current_function_static_stack_size = size;
23852
23853   /* If this isn't an interrupt service routine and we have a frame, then do
23854      stack checking.  We use IP as the first scratch register, except for the
23855      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23856   if (!IS_INTERRUPT (func_type)
23857       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23858           || flag_stack_clash_protection))
23859     {
23860       unsigned int regno;
23861
23862       if (!IS_NESTED (func_type) || clobber_ip)
23863         regno = IP_REGNUM;
23864       else if (df_regs_ever_live_p (LR_REGNUM))
23865         regno = LR_REGNUM;
23866       else
23867         regno = 3;
23868
23869       if (crtl->is_leaf && !cfun->calls_alloca)
23870         {
23871           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23872             arm_emit_probe_stack_range (get_stack_check_protect (),
23873                                         size - get_stack_check_protect (),
23874                                         regno, live_regs_mask);
23875         }
23876       else if (size > 0)
23877         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23878                                     regno, live_regs_mask);
23879     }
23880
23881   /* Recover the static chain register.  */
23882   if (clobber_ip)
23883     {
23884       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23885         insn = gen_rtx_REG (SImode, 3);
23886       else
23887         {
23888           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23889           insn = gen_frame_mem (SImode, insn);
23890         }
23891       emit_set_insn (ip_rtx, insn);
23892       emit_insn (gen_force_register_use (ip_rtx));
23893     }
23894
23895   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23896     {
23897       /* This add can produce multiple insns for a large constant, so we
23898          need to get tricky.  */
23899       rtx_insn *last = get_last_insn ();
23900
23901       amount = GEN_INT (offsets->saved_args + saved_regs
23902                         - offsets->outgoing_args);
23903
23904       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23905                                     amount));
23906       do
23907         {
23908           last = last ? NEXT_INSN (last) : get_insns ();
23909           RTX_FRAME_RELATED_P (last) = 1;
23910         }
23911       while (last != insn);
23912
23913       /* If the frame pointer is needed, emit a special barrier that
23914          will prevent the scheduler from moving stores to the frame
23915          before the stack adjustment.  */
23916       if (frame_pointer_needed)
23917         emit_insn (gen_stack_tie (stack_pointer_rtx,
23918                                   hard_frame_pointer_rtx));
23919     }
23920
23921
23922   if (frame_pointer_needed && TARGET_THUMB2)
23923     thumb_set_frame_pointer (offsets);
23924
23925   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23926     {
23927       unsigned long mask;
23928
23929       mask = live_regs_mask;
23930       mask &= THUMB2_WORK_REGS;
23931       if (!IS_NESTED (func_type))
23932         mask |= (1 << IP_REGNUM);
23933       arm_load_pic_register (mask, NULL_RTX);
23934     }
23935
23936   /* If we are profiling, make sure no instructions are scheduled before
23937      the call to mcount.  Similarly if the user has requested no
23938      scheduling in the prolog.  Similarly if we want non-call exceptions
23939      using the EABI unwinder, to prevent faulting instructions from being
23940      swapped with a stack adjustment.  */
23941   if (crtl->profile || !TARGET_SCHED_PROLOG
23942       || (arm_except_unwind_info (&global_options) == UI_TARGET
23943           && cfun->can_throw_non_call_exceptions))
23944     emit_insn (gen_blockage ());
23945
23946   /* If the link register is being kept alive, with the return address in it,
23947      then make sure that it does not get reused by the ce2 pass.  */
23948   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23949     cfun->machine->lr_save_eliminated = 1;
23950 }
23951 \f
23952 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23953 static void
23954 arm_print_condition (FILE *stream)
23955 {
23956   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23957     {
23958       /* Branch conversion is not implemented for Thumb-2.  */
23959       if (TARGET_THUMB)
23960         {
23961           output_operand_lossage ("predicated Thumb instruction");
23962           return;
23963         }
23964       if (current_insn_predicate != NULL)
23965         {
23966           output_operand_lossage
23967             ("predicated instruction in conditional sequence");
23968           return;
23969         }
23970
23971       fputs (arm_condition_codes[arm_current_cc], stream);
23972     }
23973   else if (current_insn_predicate)
23974     {
23975       enum arm_cond_code code;
23976
23977       if (TARGET_THUMB1)
23978         {
23979           output_operand_lossage ("predicated Thumb instruction");
23980           return;
23981         }
23982
23983       code = get_arm_condition_code (current_insn_predicate);
23984       fputs (arm_condition_codes[code], stream);
23985     }
23986 }
23987
23988
23989 /* Globally reserved letters: acln
23990    Puncutation letters currently used: @_|?().!#
23991    Lower case letters currently used: bcdefhimpqtvwxyz
23992    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23993    Letters previously used, but now deprecated/obsolete: sWXYZ.
23994
23995    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23996
23997    If CODE is 'd', then the X is a condition operand and the instruction
23998    should only be executed if the condition is true.
23999    if CODE is 'D', then the X is a condition operand and the instruction
24000    should only be executed if the condition is false: however, if the mode
24001    of the comparison is CCFPEmode, then always execute the instruction -- we
24002    do this because in these circumstances !GE does not necessarily imply LT;
24003    in these cases the instruction pattern will take care to make sure that
24004    an instruction containing %d will follow, thereby undoing the effects of
24005    doing this instruction unconditionally.
24006    If CODE is 'N' then X is a floating point operand that must be negated
24007    before output.
24008    If CODE is 'B' then output a bitwise inverted value of X (a const int).
24009    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24010    If CODE is 'V', then the operand must be a CONST_INT representing
24011    the bits to preserve in the modified register (Rd) of a BFI or BFC
24012    instruction: print out both the width and lsb (shift) fields.  */
24013 static void
24014 arm_print_operand (FILE *stream, rtx x, int code)
24015 {
24016   switch (code)
24017     {
24018     case '@':
24019       fputs (ASM_COMMENT_START, stream);
24020       return;
24021
24022     case '_':
24023       fputs (user_label_prefix, stream);
24024       return;
24025
24026     case '|':
24027       fputs (REGISTER_PREFIX, stream);
24028       return;
24029
24030     case '?':
24031       arm_print_condition (stream);
24032       return;
24033
24034     case '.':
24035       /* The current condition code for a condition code setting instruction.
24036          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
24037       fputc('s', stream);
24038       arm_print_condition (stream);
24039       return;
24040
24041     case '!':
24042       /* If the instruction is conditionally executed then print
24043          the current condition code, otherwise print 's'.  */
24044       gcc_assert (TARGET_THUMB2);
24045       if (current_insn_predicate)
24046         arm_print_condition (stream);
24047       else
24048         fputc('s', stream);
24049       break;
24050
24051     /* %# is a "break" sequence. It doesn't output anything, but is used to
24052        separate e.g. operand numbers from following text, if that text consists
24053        of further digits which we don't want to be part of the operand
24054        number.  */
24055     case '#':
24056       return;
24057
24058     case 'N':
24059       {
24060         REAL_VALUE_TYPE r;
24061         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24062         fprintf (stream, "%s", fp_const_from_val (&r));
24063       }
24064       return;
24065
24066     /* An integer or symbol address without a preceding # sign.  */
24067     case 'c':
24068       switch (GET_CODE (x))
24069         {
24070         case CONST_INT:
24071           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24072           break;
24073
24074         case SYMBOL_REF:
24075           output_addr_const (stream, x);
24076           break;
24077
24078         case CONST:
24079           if (GET_CODE (XEXP (x, 0)) == PLUS
24080               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24081             {
24082               output_addr_const (stream, x);
24083               break;
24084             }
24085           /* Fall through.  */
24086
24087         default:
24088           output_operand_lossage ("Unsupported operand for code '%c'", code);
24089         }
24090       return;
24091
24092     /* An integer that we want to print in HEX.  */
24093     case 'x':
24094       switch (GET_CODE (x))
24095         {
24096         case CONST_INT:
24097           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24098           break;
24099
24100         default:
24101           output_operand_lossage ("Unsupported operand for code '%c'", code);
24102         }
24103       return;
24104
24105     case 'B':
24106       if (CONST_INT_P (x))
24107         {
24108           HOST_WIDE_INT val;
24109           val = ARM_SIGN_EXTEND (~INTVAL (x));
24110           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24111         }
24112       else
24113         {
24114           putc ('~', stream);
24115           output_addr_const (stream, x);
24116         }
24117       return;
24118
24119     case 'b':
24120       /* Print the log2 of a CONST_INT.  */
24121       {
24122         HOST_WIDE_INT val;
24123
24124         if (!CONST_INT_P (x)
24125             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24126           output_operand_lossage ("Unsupported operand for code '%c'", code);
24127         else
24128           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24129       }
24130       return;
24131
24132     case 'L':
24133       /* The low 16 bits of an immediate constant.  */
24134       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24135       return;
24136
24137     case 'i':
24138       fprintf (stream, "%s", arithmetic_instr (x, 1));
24139       return;
24140
24141     case 'I':
24142       fprintf (stream, "%s", arithmetic_instr (x, 0));
24143       return;
24144
24145     case 'S':
24146       {
24147         HOST_WIDE_INT val;
24148         const char *shift;
24149
24150         shift = shift_op (x, &val);
24151
24152         if (shift)
24153           {
24154             fprintf (stream, ", %s ", shift);
24155             if (val == -1)
24156               arm_print_operand (stream, XEXP (x, 1), 0);
24157             else
24158               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24159           }
24160       }
24161       return;
24162
24163       /* An explanation of the 'Q', 'R' and 'H' register operands:
24164
24165          In a pair of registers containing a DI or DF value the 'Q'
24166          operand returns the register number of the register containing
24167          the least significant part of the value.  The 'R' operand returns
24168          the register number of the register containing the most
24169          significant part of the value.
24170
24171          The 'H' operand returns the higher of the two register numbers.
24172          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24173          same as the 'Q' operand, since the most significant part of the
24174          value is held in the lower number register.  The reverse is true
24175          on systems where WORDS_BIG_ENDIAN is false.
24176
24177          The purpose of these operands is to distinguish between cases
24178          where the endian-ness of the values is important (for example
24179          when they are added together), and cases where the endian-ness
24180          is irrelevant, but the order of register operations is important.
24181          For example when loading a value from memory into a register
24182          pair, the endian-ness does not matter.  Provided that the value
24183          from the lower memory address is put into the lower numbered
24184          register, and the value from the higher address is put into the
24185          higher numbered register, the load will work regardless of whether
24186          the value being loaded is big-wordian or little-wordian.  The
24187          order of the two register loads can matter however, if the address
24188          of the memory location is actually held in one of the registers
24189          being overwritten by the load.
24190
24191          The 'Q' and 'R' constraints are also available for 64-bit
24192          constants.  */
24193     case 'Q':
24194       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24195         {
24196           rtx part = gen_lowpart (SImode, x);
24197           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24198           return;
24199         }
24200
24201       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24202         {
24203           output_operand_lossage ("invalid operand for code '%c'", code);
24204           return;
24205         }
24206
24207       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24208       return;
24209
24210     case 'R':
24211       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24212         {
24213           machine_mode mode = GET_MODE (x);
24214           rtx part;
24215
24216           if (mode == VOIDmode)
24217             mode = DImode;
24218           part = gen_highpart_mode (SImode, mode, x);
24219           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24220           return;
24221         }
24222
24223       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24224         {
24225           output_operand_lossage ("invalid operand for code '%c'", code);
24226           return;
24227         }
24228
24229       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24230       return;
24231
24232     case 'H':
24233       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24234         {
24235           output_operand_lossage ("invalid operand for code '%c'", code);
24236           return;
24237         }
24238
24239       asm_fprintf (stream, "%r", REGNO (x) + 1);
24240       return;
24241
24242     case 'J':
24243       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24244         {
24245           output_operand_lossage ("invalid operand for code '%c'", code);
24246           return;
24247         }
24248
24249       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24250       return;
24251
24252     case 'K':
24253       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24254         {
24255           output_operand_lossage ("invalid operand for code '%c'", code);
24256           return;
24257         }
24258
24259       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24260       return;
24261
24262     case 'm':
24263       asm_fprintf (stream, "%r",
24264                    REG_P (XEXP (x, 0))
24265                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24266       return;
24267
24268     case 'M':
24269       asm_fprintf (stream, "{%r-%r}",
24270                    REGNO (x),
24271                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24272       return;
24273
24274     /* Like 'M', but writing doubleword vector registers, for use by Neon
24275        insns.  */
24276     case 'h':
24277       {
24278         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24279         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24280         if (numregs == 1)
24281           asm_fprintf (stream, "{d%d}", regno);
24282         else
24283           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24284       }
24285       return;
24286
24287     case 'd':
24288       /* CONST_TRUE_RTX means always -- that's the default.  */
24289       if (x == const_true_rtx)
24290         return;
24291
24292       if (!COMPARISON_P (x))
24293         {
24294           output_operand_lossage ("invalid operand for code '%c'", code);
24295           return;
24296         }
24297
24298       fputs (arm_condition_codes[get_arm_condition_code (x)],
24299              stream);
24300       return;
24301
24302     case 'D':
24303       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24304          want to do that.  */
24305       if (x == const_true_rtx)
24306         {
24307           output_operand_lossage ("instruction never executed");
24308           return;
24309         }
24310       if (!COMPARISON_P (x))
24311         {
24312           output_operand_lossage ("invalid operand for code '%c'", code);
24313           return;
24314         }
24315
24316       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24317                                  (get_arm_condition_code (x))],
24318              stream);
24319       return;
24320
24321     case 'V':
24322       {
24323         /* Output the LSB (shift) and width for a bitmask instruction
24324            based on a literal mask.  The LSB is printed first,
24325            followed by the width.
24326
24327            Eg. For 0b1...1110001, the result is #1, #3.  */
24328         if (!CONST_INT_P (x))
24329           {
24330             output_operand_lossage ("invalid operand for code '%c'", code);
24331             return;
24332           }
24333
24334         unsigned HOST_WIDE_INT val
24335           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24336         int lsb = exact_log2 (val & -val);
24337         asm_fprintf (stream, "#%d, #%d", lsb,
24338                      (exact_log2 (val + (val & -val)) - lsb));
24339       }
24340       return;
24341
24342     case 's':
24343     case 'W':
24344     case 'X':
24345     case 'Y':
24346     case 'Z':
24347       /* Former Maverick support, removed after GCC-4.7.  */
24348       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24349       return;
24350
24351     case 'U':
24352       if (!REG_P (x)
24353           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24354           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24355         /* Bad value for wCG register number.  */
24356         {
24357           output_operand_lossage ("invalid operand for code '%c'", code);
24358           return;
24359         }
24360
24361       else
24362         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24363       return;
24364
24365       /* Print an iWMMXt control register name.  */
24366     case 'w':
24367       if (!CONST_INT_P (x)
24368           || INTVAL (x) < 0
24369           || INTVAL (x) >= 16)
24370         /* Bad value for wC register number.  */
24371         {
24372           output_operand_lossage ("invalid operand for code '%c'", code);
24373           return;
24374         }
24375
24376       else
24377         {
24378           static const char * wc_reg_names [16] =
24379             {
24380               "wCID",  "wCon",  "wCSSF", "wCASF",
24381               "wC4",   "wC5",   "wC6",   "wC7",
24382               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24383               "wC12",  "wC13",  "wC14",  "wC15"
24384             };
24385
24386           fputs (wc_reg_names [INTVAL (x)], stream);
24387         }
24388       return;
24389
24390     /* Print the high single-precision register of a VFP double-precision
24391        register.  */
24392     case 'p':
24393       {
24394         machine_mode mode = GET_MODE (x);
24395         int regno;
24396
24397         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24398           {
24399             output_operand_lossage ("invalid operand for code '%c'", code);
24400             return;
24401           }
24402
24403         regno = REGNO (x);
24404         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24405           {
24406             output_operand_lossage ("invalid operand for code '%c'", code);
24407             return;
24408           }
24409
24410         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24411       }
24412       return;
24413
24414     /* Print a VFP/Neon double precision or quad precision register name.  */
24415     case 'P':
24416     case 'q':
24417       {
24418         machine_mode mode = GET_MODE (x);
24419         int is_quad = (code == 'q');
24420         int regno;
24421
24422         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24423           {
24424             output_operand_lossage ("invalid operand for code '%c'", code);
24425             return;
24426           }
24427
24428         if (!REG_P (x)
24429             || !IS_VFP_REGNUM (REGNO (x)))
24430           {
24431             output_operand_lossage ("invalid operand for code '%c'", code);
24432             return;
24433           }
24434
24435         regno = REGNO (x);
24436         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24437             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24438           {
24439             output_operand_lossage ("invalid operand for code '%c'", code);
24440             return;
24441           }
24442
24443         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24444           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24445       }
24446       return;
24447
24448     /* These two codes print the low/high doubleword register of a Neon quad
24449        register, respectively.  For pair-structure types, can also print
24450        low/high quadword registers.  */
24451     case 'e':
24452     case 'f':
24453       {
24454         machine_mode mode = GET_MODE (x);
24455         int regno;
24456
24457         if ((GET_MODE_SIZE (mode) != 16
24458              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24459           {
24460             output_operand_lossage ("invalid operand for code '%c'", code);
24461             return;
24462           }
24463
24464         regno = REGNO (x);
24465         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24466           {
24467             output_operand_lossage ("invalid operand for code '%c'", code);
24468             return;
24469           }
24470
24471         if (GET_MODE_SIZE (mode) == 16)
24472           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24473                                   + (code == 'f' ? 1 : 0));
24474         else
24475           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24476                                   + (code == 'f' ? 1 : 0));
24477       }
24478       return;
24479
24480     /* Print a VFPv3 floating-point constant, represented as an integer
24481        index.  */
24482     case 'G':
24483       {
24484         int index = vfp3_const_double_index (x);
24485         gcc_assert (index != -1);
24486         fprintf (stream, "%d", index);
24487       }
24488       return;
24489
24490     /* Print bits representing opcode features for Neon.
24491
24492        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24493        and polynomials as unsigned.
24494
24495        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24496
24497        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24498
24499     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24500     case 'T':
24501       {
24502         HOST_WIDE_INT bits = INTVAL (x);
24503         fputc ("uspf"[bits & 3], stream);
24504       }
24505       return;
24506
24507     /* Likewise, but signed and unsigned integers are both 'i'.  */
24508     case 'F':
24509       {
24510         HOST_WIDE_INT bits = INTVAL (x);
24511         fputc ("iipf"[bits & 3], stream);
24512       }
24513       return;
24514
24515     /* As for 'T', but emit 'u' instead of 'p'.  */
24516     case 't':
24517       {
24518         HOST_WIDE_INT bits = INTVAL (x);
24519         fputc ("usuf"[bits & 3], stream);
24520       }
24521       return;
24522
24523     /* Bit 2: rounding (vs none).  */
24524     case 'O':
24525       {
24526         HOST_WIDE_INT bits = INTVAL (x);
24527         fputs ((bits & 4) != 0 ? "r" : "", stream);
24528       }
24529       return;
24530
24531     /* Memory operand for vld1/vst1 instruction.  */
24532     case 'A':
24533       {
24534         rtx addr;
24535         bool postinc = FALSE;
24536         rtx postinc_reg = NULL;
24537         unsigned align, memsize, align_bits;
24538
24539         gcc_assert (MEM_P (x));
24540         addr = XEXP (x, 0);
24541         if (GET_CODE (addr) == POST_INC)
24542           {
24543             postinc = 1;
24544             addr = XEXP (addr, 0);
24545           }
24546         if (GET_CODE (addr) == POST_MODIFY)
24547           {
24548             postinc_reg = XEXP( XEXP (addr, 1), 1);
24549             addr = XEXP (addr, 0);
24550           }
24551         asm_fprintf (stream, "[%r", REGNO (addr));
24552
24553         /* We know the alignment of this access, so we can emit a hint in the
24554            instruction (for some alignments) as an aid to the memory subsystem
24555            of the target.  */
24556         align = MEM_ALIGN (x) >> 3;
24557         memsize = MEM_SIZE (x);
24558
24559         /* Only certain alignment specifiers are supported by the hardware.  */
24560         if (memsize == 32 && (align % 32) == 0)
24561           align_bits = 256;
24562         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24563           align_bits = 128;
24564         else if (memsize >= 8 && (align % 8) == 0)
24565           align_bits = 64;
24566         else
24567           align_bits = 0;
24568
24569         if (align_bits != 0)
24570           asm_fprintf (stream, ":%d", align_bits);
24571
24572         asm_fprintf (stream, "]");
24573
24574         if (postinc)
24575           fputs("!", stream);
24576         if (postinc_reg)
24577           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24578       }
24579       return;
24580
24581     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24582        rtx_code the memory operands output looks like following.
24583        1. [Rn], #+/-<imm>
24584        2. [Rn, #+/-<imm>]!
24585        3. [Rn, #+/-<imm>]
24586        4. [Rn].  */
24587     case 'E':
24588       {
24589         rtx addr;
24590         rtx postinc_reg = NULL;
24591         unsigned inc_val = 0;
24592         enum rtx_code code;
24593
24594         gcc_assert (MEM_P (x));
24595         addr = XEXP (x, 0);
24596         code = GET_CODE (addr);
24597         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24598             || code  == PRE_DEC)
24599           {
24600             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24601             inc_val = GET_MODE_SIZE (GET_MODE (x));
24602             if (code == POST_INC || code == POST_DEC)
24603               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24604                                               ? "": "-", inc_val);
24605             else
24606               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24607                                                ? "": "-", inc_val);
24608           }
24609         else if (code == POST_MODIFY || code == PRE_MODIFY)
24610           {
24611             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24612             postinc_reg = XEXP (XEXP (addr, 1), 1);
24613             if (postinc_reg && CONST_INT_P (postinc_reg))
24614               {
24615                 if (code == POST_MODIFY)
24616                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24617                 else
24618                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24619               }
24620           }
24621         else if (code == PLUS)
24622           {
24623             rtx base = XEXP (addr, 0);
24624             rtx index = XEXP (addr, 1);
24625
24626             gcc_assert (REG_P (base) && CONST_INT_P (index));
24627
24628             HOST_WIDE_INT offset = INTVAL (index);
24629             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24630           }
24631         else
24632           {
24633             gcc_assert (REG_P (addr));
24634             asm_fprintf (stream, "[%r]",REGNO (addr));
24635           }
24636       }
24637       return;
24638
24639     case 'C':
24640       {
24641         rtx addr;
24642
24643         gcc_assert (MEM_P (x));
24644         addr = XEXP (x, 0);
24645         gcc_assert (REG_P (addr));
24646         asm_fprintf (stream, "[%r]", REGNO (addr));
24647       }
24648       return;
24649
24650     /* Translate an S register number into a D register number and element index.  */
24651     case 'y':
24652       {
24653         machine_mode mode = GET_MODE (x);
24654         int regno;
24655
24656         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24657           {
24658             output_operand_lossage ("invalid operand for code '%c'", code);
24659             return;
24660           }
24661
24662         regno = REGNO (x);
24663         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24664           {
24665             output_operand_lossage ("invalid operand for code '%c'", code);
24666             return;
24667           }
24668
24669         regno = regno - FIRST_VFP_REGNUM;
24670         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24671       }
24672       return;
24673
24674     case 'v':
24675         gcc_assert (CONST_DOUBLE_P (x));
24676         int result;
24677         result = vfp3_const_double_for_fract_bits (x);
24678         if (result == 0)
24679           result = vfp3_const_double_for_bits (x);
24680         fprintf (stream, "#%d", result);
24681         return;
24682
24683     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24684        number into a D register number and element index.  */
24685     case 'z':
24686       {
24687         machine_mode mode = GET_MODE (x);
24688         int regno;
24689
24690         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24691           {
24692             output_operand_lossage ("invalid operand for code '%c'", code);
24693             return;
24694           }
24695
24696         regno = REGNO (x);
24697         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24698           {
24699             output_operand_lossage ("invalid operand for code '%c'", code);
24700             return;
24701           }
24702
24703         regno = regno - FIRST_VFP_REGNUM;
24704         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24705       }
24706       return;
24707
24708     default:
24709       if (x == 0)
24710         {
24711           output_operand_lossage ("missing operand");
24712           return;
24713         }
24714
24715       switch (GET_CODE (x))
24716         {
24717         case REG:
24718           asm_fprintf (stream, "%r", REGNO (x));
24719           break;
24720
24721         case MEM:
24722           output_address (GET_MODE (x), XEXP (x, 0));
24723           break;
24724
24725         case CONST_DOUBLE:
24726           {
24727             char fpstr[20];
24728             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24729                               sizeof (fpstr), 0, 1);
24730             fprintf (stream, "#%s", fpstr);
24731           }
24732           break;
24733
24734         default:
24735           gcc_assert (GET_CODE (x) != NEG);
24736           fputc ('#', stream);
24737           if (GET_CODE (x) == HIGH)
24738             {
24739               fputs (":lower16:", stream);
24740               x = XEXP (x, 0);
24741             }
24742
24743           output_addr_const (stream, x);
24744           break;
24745         }
24746     }
24747 }
24748 \f
24749 /* Target hook for printing a memory address.  */
24750 static void
24751 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24752 {
24753   if (TARGET_32BIT)
24754     {
24755       int is_minus = GET_CODE (x) == MINUS;
24756
24757       if (REG_P (x))
24758         asm_fprintf (stream, "[%r]", REGNO (x));
24759       else if (GET_CODE (x) == PLUS || is_minus)
24760         {
24761           rtx base = XEXP (x, 0);
24762           rtx index = XEXP (x, 1);
24763           HOST_WIDE_INT offset = 0;
24764           if (!REG_P (base)
24765               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24766             {
24767               /* Ensure that BASE is a register.  */
24768               /* (one of them must be).  */
24769               /* Also ensure the SP is not used as in index register.  */
24770               std::swap (base, index);
24771             }
24772           switch (GET_CODE (index))
24773             {
24774             case CONST_INT:
24775               offset = INTVAL (index);
24776               if (is_minus)
24777                 offset = -offset;
24778               asm_fprintf (stream, "[%r, #%wd]",
24779                            REGNO (base), offset);
24780               break;
24781
24782             case REG:
24783               asm_fprintf (stream, "[%r, %s%r]",
24784                            REGNO (base), is_minus ? "-" : "",
24785                            REGNO (index));
24786               break;
24787
24788             case MULT:
24789             case ASHIFTRT:
24790             case LSHIFTRT:
24791             case ASHIFT:
24792             case ROTATERT:
24793               {
24794                 asm_fprintf (stream, "[%r, %s%r",
24795                              REGNO (base), is_minus ? "-" : "",
24796                              REGNO (XEXP (index, 0)));
24797                 arm_print_operand (stream, index, 'S');
24798                 fputs ("]", stream);
24799                 break;
24800               }
24801
24802             default:
24803               gcc_unreachable ();
24804             }
24805         }
24806       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24807                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24808         {
24809           gcc_assert (REG_P (XEXP (x, 0)));
24810
24811           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24812             asm_fprintf (stream, "[%r, #%s%d]!",
24813                          REGNO (XEXP (x, 0)),
24814                          GET_CODE (x) == PRE_DEC ? "-" : "",
24815                          GET_MODE_SIZE (mode));
24816           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24817             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24818           else
24819             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24820                          GET_CODE (x) == POST_DEC ? "-" : "",
24821                          GET_MODE_SIZE (mode));
24822         }
24823       else if (GET_CODE (x) == PRE_MODIFY)
24824         {
24825           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24826           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24827             asm_fprintf (stream, "#%wd]!",
24828                          INTVAL (XEXP (XEXP (x, 1), 1)));
24829           else
24830             asm_fprintf (stream, "%r]!",
24831                          REGNO (XEXP (XEXP (x, 1), 1)));
24832         }
24833       else if (GET_CODE (x) == POST_MODIFY)
24834         {
24835           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24836           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24837             asm_fprintf (stream, "#%wd",
24838                          INTVAL (XEXP (XEXP (x, 1), 1)));
24839           else
24840             asm_fprintf (stream, "%r",
24841                          REGNO (XEXP (XEXP (x, 1), 1)));
24842         }
24843       else output_addr_const (stream, x);
24844     }
24845   else
24846     {
24847       if (REG_P (x))
24848         asm_fprintf (stream, "[%r]", REGNO (x));
24849       else if (GET_CODE (x) == POST_INC)
24850         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24851       else if (GET_CODE (x) == PLUS)
24852         {
24853           gcc_assert (REG_P (XEXP (x, 0)));
24854           if (CONST_INT_P (XEXP (x, 1)))
24855             asm_fprintf (stream, "[%r, #%wd]",
24856                          REGNO (XEXP (x, 0)),
24857                          INTVAL (XEXP (x, 1)));
24858           else
24859             asm_fprintf (stream, "[%r, %r]",
24860                          REGNO (XEXP (x, 0)),
24861                          REGNO (XEXP (x, 1)));
24862         }
24863       else
24864         output_addr_const (stream, x);
24865     }
24866 }
24867 \f
24868 /* Target hook for indicating whether a punctuation character for
24869    TARGET_PRINT_OPERAND is valid.  */
24870 static bool
24871 arm_print_operand_punct_valid_p (unsigned char code)
24872 {
24873   return (code == '@' || code == '|' || code == '.'
24874           || code == '(' || code == ')' || code == '#'
24875           || (TARGET_32BIT && (code == '?'))
24876           || (TARGET_THUMB2 && (code == '!'))
24877           || (TARGET_THUMB && (code == '_')));
24878 }
24879 \f
24880 /* Target hook for assembling integer objects.  The ARM version needs to
24881    handle word-sized values specially.  */
24882 static bool
24883 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24884 {
24885   machine_mode mode;
24886
24887   if (size == UNITS_PER_WORD && aligned_p)
24888     {
24889       fputs ("\t.word\t", asm_out_file);
24890       output_addr_const (asm_out_file, x);
24891
24892       /* Mark symbols as position independent.  We only do this in the
24893          .text segment, not in the .data segment.  */
24894       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24895           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24896         {
24897           /* See legitimize_pic_address for an explanation of the
24898              TARGET_VXWORKS_RTP check.  */
24899           /* References to weak symbols cannot be resolved locally:
24900              they may be overridden by a non-weak definition at link
24901              time.  */
24902           if (!arm_pic_data_is_text_relative
24903               || (SYMBOL_REF_P (x)
24904                   && (!SYMBOL_REF_LOCAL_P (x)
24905                       || (SYMBOL_REF_DECL (x)
24906                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24907                       || (SYMBOL_REF_FUNCTION_P (x)
24908                           && !arm_fdpic_local_funcdesc_p (x)))))
24909             {
24910               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24911                 fputs ("(GOTFUNCDESC)", asm_out_file);
24912               else
24913                 fputs ("(GOT)", asm_out_file);
24914             }
24915           else
24916             {
24917               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24918                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24919               else
24920                 {
24921                   bool is_readonly;
24922
24923                   if (!TARGET_FDPIC
24924                       || arm_is_segment_info_known (x, &is_readonly))
24925                     fputs ("(GOTOFF)", asm_out_file);
24926                   else
24927                     fputs ("(GOT)", asm_out_file);
24928                 }
24929             }
24930         }
24931
24932       /* For FDPIC we also have to mark symbol for .data section.  */
24933       if (TARGET_FDPIC
24934           && !making_const_table
24935           && SYMBOL_REF_P (x)
24936           && SYMBOL_REF_FUNCTION_P (x))
24937         fputs ("(FUNCDESC)", asm_out_file);
24938
24939       fputc ('\n', asm_out_file);
24940       return true;
24941     }
24942
24943   mode = GET_MODE (x);
24944
24945   if (arm_vector_mode_supported_p (mode))
24946     {
24947       int i, units;
24948
24949       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24950
24951       units = CONST_VECTOR_NUNITS (x);
24952       size = GET_MODE_UNIT_SIZE (mode);
24953
24954       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24955         for (i = 0; i < units; i++)
24956           {
24957             rtx elt = CONST_VECTOR_ELT (x, i);
24958             assemble_integer
24959               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24960           }
24961       else
24962         for (i = 0; i < units; i++)
24963           {
24964             rtx elt = CONST_VECTOR_ELT (x, i);
24965             assemble_real
24966               (*CONST_DOUBLE_REAL_VALUE (elt),
24967                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24968                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24969           }
24970
24971       return true;
24972     }
24973
24974   return default_assemble_integer (x, size, aligned_p);
24975 }
24976
24977 static void
24978 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24979 {
24980   section *s;
24981
24982   if (!TARGET_AAPCS_BASED)
24983     {
24984       (is_ctor ?
24985        default_named_section_asm_out_constructor
24986        : default_named_section_asm_out_destructor) (symbol, priority);
24987       return;
24988     }
24989
24990   /* Put these in the .init_array section, using a special relocation.  */
24991   if (priority != DEFAULT_INIT_PRIORITY)
24992     {
24993       char buf[18];
24994       sprintf (buf, "%s.%.5u",
24995                is_ctor ? ".init_array" : ".fini_array",
24996                priority);
24997       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24998     }
24999   else if (is_ctor)
25000     s = ctors_section;
25001   else
25002     s = dtors_section;
25003
25004   switch_to_section (s);
25005   assemble_align (POINTER_SIZE);
25006   fputs ("\t.word\t", asm_out_file);
25007   output_addr_const (asm_out_file, symbol);
25008   fputs ("(target1)\n", asm_out_file);
25009 }
25010
25011 /* Add a function to the list of static constructors.  */
25012
25013 static void
25014 arm_elf_asm_constructor (rtx symbol, int priority)
25015 {
25016   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25017 }
25018
25019 /* Add a function to the list of static destructors.  */
25020
25021 static void
25022 arm_elf_asm_destructor (rtx symbol, int priority)
25023 {
25024   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25025 }
25026 \f
25027 /* A finite state machine takes care of noticing whether or not instructions
25028    can be conditionally executed, and thus decrease execution time and code
25029    size by deleting branch instructions.  The fsm is controlled by
25030    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
25031
25032 /* The state of the fsm controlling condition codes are:
25033    0: normal, do nothing special
25034    1: make ASM_OUTPUT_OPCODE not output this instruction
25035    2: make ASM_OUTPUT_OPCODE not output this instruction
25036    3: make instructions conditional
25037    4: make instructions conditional
25038
25039    State transitions (state->state by whom under condition):
25040    0 -> 1 final_prescan_insn if the `target' is a label
25041    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25042    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25043    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25044    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25045           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25046    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25047           (the target insn is arm_target_insn).
25048
25049    If the jump clobbers the conditions then we use states 2 and 4.
25050
25051    A similar thing can be done with conditional return insns.
25052
25053    XXX In case the `target' is an unconditional branch, this conditionalising
25054    of the instructions always reduces code size, but not always execution
25055    time.  But then, I want to reduce the code size to somewhere near what
25056    /bin/cc produces.  */
25057
25058 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25059    instructions.  When a COND_EXEC instruction is seen the subsequent
25060    instructions are scanned so that multiple conditional instructions can be
25061    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25062    specify the length and true/false mask for the IT block.  These will be
25063    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25064
25065 /* Returns the index of the ARM condition code string in
25066    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25067    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25068
25069 enum arm_cond_code
25070 maybe_get_arm_condition_code (rtx comparison)
25071 {
25072   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25073   enum arm_cond_code code;
25074   enum rtx_code comp_code = GET_CODE (comparison);
25075
25076   if (GET_MODE_CLASS (mode) != MODE_CC)
25077     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25078                            XEXP (comparison, 1));
25079
25080   switch (mode)
25081     {
25082     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25083     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25084     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25085     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25086     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25087     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25088     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25089     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25090     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25091     case E_CC_DLTUmode: code = ARM_CC;
25092
25093     dominance:
25094       if (comp_code == EQ)
25095         return ARM_INVERSE_CONDITION_CODE (code);
25096       if (comp_code == NE)
25097         return code;
25098       return ARM_NV;
25099
25100     case E_CC_NZmode:
25101       switch (comp_code)
25102         {
25103         case NE: return ARM_NE;
25104         case EQ: return ARM_EQ;
25105         case GE: return ARM_PL;
25106         case LT: return ARM_MI;
25107         default: return ARM_NV;
25108         }
25109
25110     case E_CC_Zmode:
25111       switch (comp_code)
25112         {
25113         case NE: return ARM_NE;
25114         case EQ: return ARM_EQ;
25115         default: return ARM_NV;
25116         }
25117
25118     case E_CC_Nmode:
25119       switch (comp_code)
25120         {
25121         case NE: return ARM_MI;
25122         case EQ: return ARM_PL;
25123         default: return ARM_NV;
25124         }
25125
25126     case E_CCFPEmode:
25127     case E_CCFPmode:
25128       /* We can handle all cases except UNEQ and LTGT.  */
25129       switch (comp_code)
25130         {
25131         case GE: return ARM_GE;
25132         case GT: return ARM_GT;
25133         case LE: return ARM_LS;
25134         case LT: return ARM_MI;
25135         case NE: return ARM_NE;
25136         case EQ: return ARM_EQ;
25137         case ORDERED: return ARM_VC;
25138         case UNORDERED: return ARM_VS;
25139         case UNLT: return ARM_LT;
25140         case UNLE: return ARM_LE;
25141         case UNGT: return ARM_HI;
25142         case UNGE: return ARM_PL;
25143           /* UNEQ and LTGT do not have a representation.  */
25144         case UNEQ: /* Fall through.  */
25145         case LTGT: /* Fall through.  */
25146         default: return ARM_NV;
25147         }
25148
25149     case E_CC_SWPmode:
25150       switch (comp_code)
25151         {
25152         case NE: return ARM_NE;
25153         case EQ: return ARM_EQ;
25154         case GE: return ARM_LE;
25155         case GT: return ARM_LT;
25156         case LE: return ARM_GE;
25157         case LT: return ARM_GT;
25158         case GEU: return ARM_LS;
25159         case GTU: return ARM_CC;
25160         case LEU: return ARM_CS;
25161         case LTU: return ARM_HI;
25162         default: return ARM_NV;
25163         }
25164
25165     case E_CC_Cmode:
25166       switch (comp_code)
25167         {
25168         case LTU: return ARM_CS;
25169         case GEU: return ARM_CC;
25170         default: return ARM_NV;
25171         }
25172
25173     case E_CC_NVmode:
25174       switch (comp_code)
25175         {
25176         case GE: return ARM_GE;
25177         case LT: return ARM_LT;
25178         default: return ARM_NV;
25179         }
25180
25181     case E_CC_Bmode:
25182       switch (comp_code)
25183         {
25184         case GEU: return ARM_CS;
25185         case LTU: return ARM_CC;
25186         default: return ARM_NV;
25187         }
25188
25189     case E_CC_Vmode:
25190       switch (comp_code)
25191         {
25192         case NE: return ARM_VS;
25193         case EQ: return ARM_VC;
25194         default: return ARM_NV;
25195         }
25196
25197     case E_CC_ADCmode:
25198       switch (comp_code)
25199         {
25200         case GEU: return ARM_CS;
25201         case LTU: return ARM_CC;
25202         default: return ARM_NV;
25203         }
25204
25205     case E_CCmode:
25206     case E_CC_RSBmode:
25207       switch (comp_code)
25208         {
25209         case NE: return ARM_NE;
25210         case EQ: return ARM_EQ;
25211         case GE: return ARM_GE;
25212         case GT: return ARM_GT;
25213         case LE: return ARM_LE;
25214         case LT: return ARM_LT;
25215         case GEU: return ARM_CS;
25216         case GTU: return ARM_HI;
25217         case LEU: return ARM_LS;
25218         case LTU: return ARM_CC;
25219         default: return ARM_NV;
25220         }
25221
25222     default: gcc_unreachable ();
25223     }
25224 }
25225
25226 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25227 static enum arm_cond_code
25228 get_arm_condition_code (rtx comparison)
25229 {
25230   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25231   gcc_assert (code != ARM_NV);
25232   return code;
25233 }
25234
25235 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25236    code registers when not targetting Thumb1.  The VFP condition register
25237    only exists when generating hard-float code.  */
25238 static bool
25239 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25240 {
25241   if (!TARGET_32BIT)
25242     return false;
25243
25244   *p1 = CC_REGNUM;
25245   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25246   return true;
25247 }
25248
25249 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25250    instructions.  */
25251 void
25252 thumb2_final_prescan_insn (rtx_insn *insn)
25253 {
25254   rtx_insn *first_insn = insn;
25255   rtx body = PATTERN (insn);
25256   rtx predicate;
25257   enum arm_cond_code code;
25258   int n;
25259   int mask;
25260   int max;
25261
25262   /* max_insns_skipped in the tune was already taken into account in the
25263      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25264      just emit the IT blocks as we can.  It does not make sense to split
25265      the IT blocks.  */
25266   max = MAX_INSN_PER_IT_BLOCK;
25267
25268   /* Remove the previous insn from the count of insns to be output.  */
25269   if (arm_condexec_count)
25270       arm_condexec_count--;
25271
25272   /* Nothing to do if we are already inside a conditional block.  */
25273   if (arm_condexec_count)
25274     return;
25275
25276   if (GET_CODE (body) != COND_EXEC)
25277     return;
25278
25279   /* Conditional jumps are implemented directly.  */
25280   if (JUMP_P (insn))
25281     return;
25282
25283   predicate = COND_EXEC_TEST (body);
25284   arm_current_cc = get_arm_condition_code (predicate);
25285
25286   n = get_attr_ce_count (insn);
25287   arm_condexec_count = 1;
25288   arm_condexec_mask = (1 << n) - 1;
25289   arm_condexec_masklen = n;
25290   /* See if subsequent instructions can be combined into the same block.  */
25291   for (;;)
25292     {
25293       insn = next_nonnote_insn (insn);
25294
25295       /* Jumping into the middle of an IT block is illegal, so a label or
25296          barrier terminates the block.  */
25297       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25298         break;
25299
25300       body = PATTERN (insn);
25301       /* USE and CLOBBER aren't really insns, so just skip them.  */
25302       if (GET_CODE (body) == USE
25303           || GET_CODE (body) == CLOBBER)
25304         continue;
25305
25306       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25307       if (GET_CODE (body) != COND_EXEC)
25308         break;
25309       /* Maximum number of conditionally executed instructions in a block.  */
25310       n = get_attr_ce_count (insn);
25311       if (arm_condexec_masklen + n > max)
25312         break;
25313
25314       predicate = COND_EXEC_TEST (body);
25315       code = get_arm_condition_code (predicate);
25316       mask = (1 << n) - 1;
25317       if (arm_current_cc == code)
25318         arm_condexec_mask |= (mask << arm_condexec_masklen);
25319       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25320         break;
25321
25322       arm_condexec_count++;
25323       arm_condexec_masklen += n;
25324
25325       /* A jump must be the last instruction in a conditional block.  */
25326       if (JUMP_P (insn))
25327         break;
25328     }
25329   /* Restore recog_data (getting the attributes of other insns can
25330      destroy this array, but final.cc assumes that it remains intact
25331      across this call).  */
25332   extract_constrain_insn_cached (first_insn);
25333 }
25334
25335 void
25336 arm_final_prescan_insn (rtx_insn *insn)
25337 {
25338   /* BODY will hold the body of INSN.  */
25339   rtx body = PATTERN (insn);
25340
25341   /* This will be 1 if trying to repeat the trick, and things need to be
25342      reversed if it appears to fail.  */
25343   int reverse = 0;
25344
25345   /* If we start with a return insn, we only succeed if we find another one.  */
25346   int seeking_return = 0;
25347   enum rtx_code return_code = UNKNOWN;
25348
25349   /* START_INSN will hold the insn from where we start looking.  This is the
25350      first insn after the following code_label if REVERSE is true.  */
25351   rtx_insn *start_insn = insn;
25352
25353   /* If in state 4, check if the target branch is reached, in order to
25354      change back to state 0.  */
25355   if (arm_ccfsm_state == 4)
25356     {
25357       if (insn == arm_target_insn)
25358         {
25359           arm_target_insn = NULL;
25360           arm_ccfsm_state = 0;
25361         }
25362       return;
25363     }
25364
25365   /* If in state 3, it is possible to repeat the trick, if this insn is an
25366      unconditional branch to a label, and immediately following this branch
25367      is the previous target label which is only used once, and the label this
25368      branch jumps to is not too far off.  */
25369   if (arm_ccfsm_state == 3)
25370     {
25371       if (simplejump_p (insn))
25372         {
25373           start_insn = next_nonnote_insn (start_insn);
25374           if (BARRIER_P (start_insn))
25375             {
25376               /* XXX Isn't this always a barrier?  */
25377               start_insn = next_nonnote_insn (start_insn);
25378             }
25379           if (LABEL_P (start_insn)
25380               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25381               && LABEL_NUSES (start_insn) == 1)
25382             reverse = TRUE;
25383           else
25384             return;
25385         }
25386       else if (ANY_RETURN_P (body))
25387         {
25388           start_insn = next_nonnote_insn (start_insn);
25389           if (BARRIER_P (start_insn))
25390             start_insn = next_nonnote_insn (start_insn);
25391           if (LABEL_P (start_insn)
25392               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25393               && LABEL_NUSES (start_insn) == 1)
25394             {
25395               reverse = TRUE;
25396               seeking_return = 1;
25397               return_code = GET_CODE (body);
25398             }
25399           else
25400             return;
25401         }
25402       else
25403         return;
25404     }
25405
25406   gcc_assert (!arm_ccfsm_state || reverse);
25407   if (!JUMP_P (insn))
25408     return;
25409
25410   /* This jump might be paralleled with a clobber of the condition codes
25411      the jump should always come first */
25412   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25413     body = XVECEXP (body, 0, 0);
25414
25415   if (reverse
25416       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25417           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25418     {
25419       int insns_skipped;
25420       int fail = FALSE, succeed = FALSE;
25421       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25422       int then_not_else = TRUE;
25423       rtx_insn *this_insn = start_insn;
25424       rtx label = 0;
25425
25426       /* Register the insn jumped to.  */
25427       if (reverse)
25428         {
25429           if (!seeking_return)
25430             label = XEXP (SET_SRC (body), 0);
25431         }
25432       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25433         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25434       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25435         {
25436           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25437           then_not_else = FALSE;
25438         }
25439       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25440         {
25441           seeking_return = 1;
25442           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25443         }
25444       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25445         {
25446           seeking_return = 1;
25447           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25448           then_not_else = FALSE;
25449         }
25450       else
25451         gcc_unreachable ();
25452
25453       /* See how many insns this branch skips, and what kind of insns.  If all
25454          insns are okay, and the label or unconditional branch to the same
25455          label is not too far away, succeed.  */
25456       for (insns_skipped = 0;
25457            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25458         {
25459           rtx scanbody;
25460
25461           this_insn = next_nonnote_insn (this_insn);
25462           if (!this_insn)
25463             break;
25464
25465           switch (GET_CODE (this_insn))
25466             {
25467             case CODE_LABEL:
25468               /* Succeed if it is the target label, otherwise fail since
25469                  control falls in from somewhere else.  */
25470               if (this_insn == label)
25471                 {
25472                   arm_ccfsm_state = 1;
25473                   succeed = TRUE;
25474                 }
25475               else
25476                 fail = TRUE;
25477               break;
25478
25479             case BARRIER:
25480               /* Succeed if the following insn is the target label.
25481                  Otherwise fail.
25482                  If return insns are used then the last insn in a function
25483                  will be a barrier.  */
25484               this_insn = next_nonnote_insn (this_insn);
25485               if (this_insn && this_insn == label)
25486                 {
25487                   arm_ccfsm_state = 1;
25488                   succeed = TRUE;
25489                 }
25490               else
25491                 fail = TRUE;
25492               break;
25493
25494             case CALL_INSN:
25495               /* The AAPCS says that conditional calls should not be
25496                  used since they make interworking inefficient (the
25497                  linker can't transform BL<cond> into BLX).  That's
25498                  only a problem if the machine has BLX.  */
25499               if (arm_arch5t)
25500                 {
25501                   fail = TRUE;
25502                   break;
25503                 }
25504
25505               /* Succeed if the following insn is the target label, or
25506                  if the following two insns are a barrier and the
25507                  target label.  */
25508               this_insn = next_nonnote_insn (this_insn);
25509               if (this_insn && BARRIER_P (this_insn))
25510                 this_insn = next_nonnote_insn (this_insn);
25511
25512               if (this_insn && this_insn == label
25513                   && insns_skipped < max_insns_skipped)
25514                 {
25515                   arm_ccfsm_state = 1;
25516                   succeed = TRUE;
25517                 }
25518               else
25519                 fail = TRUE;
25520               break;
25521
25522             case JUMP_INSN:
25523               /* If this is an unconditional branch to the same label, succeed.
25524                  If it is to another label, do nothing.  If it is conditional,
25525                  fail.  */
25526               /* XXX Probably, the tests for SET and the PC are
25527                  unnecessary.  */
25528
25529               scanbody = PATTERN (this_insn);
25530               if (GET_CODE (scanbody) == SET
25531                   && GET_CODE (SET_DEST (scanbody)) == PC)
25532                 {
25533                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25534                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25535                     {
25536                       arm_ccfsm_state = 2;
25537                       succeed = TRUE;
25538                     }
25539                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25540                     fail = TRUE;
25541                 }
25542               /* Fail if a conditional return is undesirable (e.g. on a
25543                  StrongARM), but still allow this if optimizing for size.  */
25544               else if (GET_CODE (scanbody) == return_code
25545                        && !use_return_insn (TRUE, NULL)
25546                        && !optimize_size)
25547                 fail = TRUE;
25548               else if (GET_CODE (scanbody) == return_code)
25549                 {
25550                   arm_ccfsm_state = 2;
25551                   succeed = TRUE;
25552                 }
25553               else if (GET_CODE (scanbody) == PARALLEL)
25554                 {
25555                   switch (get_attr_conds (this_insn))
25556                     {
25557                     case CONDS_NOCOND:
25558                       break;
25559                     default:
25560                       fail = TRUE;
25561                       break;
25562                     }
25563                 }
25564               else
25565                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25566
25567               break;
25568
25569             case INSN:
25570               /* Instructions using or affecting the condition codes make it
25571                  fail.  */
25572               scanbody = PATTERN (this_insn);
25573               if (!(GET_CODE (scanbody) == SET
25574                     || GET_CODE (scanbody) == PARALLEL)
25575                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25576                 fail = TRUE;
25577               break;
25578
25579             default:
25580               break;
25581             }
25582         }
25583       if (succeed)
25584         {
25585           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25586             arm_target_label = CODE_LABEL_NUMBER (label);
25587           else
25588             {
25589               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25590
25591               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25592                 {
25593                   this_insn = next_nonnote_insn (this_insn);
25594                   gcc_assert (!this_insn
25595                               || (!BARRIER_P (this_insn)
25596                                   && !LABEL_P (this_insn)));
25597                 }
25598               if (!this_insn)
25599                 {
25600                   /* Oh, dear! we ran off the end.. give up.  */
25601                   extract_constrain_insn_cached (insn);
25602                   arm_ccfsm_state = 0;
25603                   arm_target_insn = NULL;
25604                   return;
25605                 }
25606               arm_target_insn = this_insn;
25607             }
25608
25609           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25610              what it was.  */
25611           if (!reverse)
25612             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25613
25614           if (reverse || then_not_else)
25615             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25616         }
25617
25618       /* Restore recog_data (getting the attributes of other insns can
25619          destroy this array, but final.cc assumes that it remains intact
25620          across this call.  */
25621       extract_constrain_insn_cached (insn);
25622     }
25623 }
25624
25625 /* Output IT instructions.  */
25626 void
25627 thumb2_asm_output_opcode (FILE * stream)
25628 {
25629   char buff[5];
25630   int n;
25631
25632   if (arm_condexec_mask)
25633     {
25634       for (n = 0; n < arm_condexec_masklen; n++)
25635         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25636       buff[n] = 0;
25637       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25638                   arm_condition_codes[arm_current_cc]);
25639       arm_condexec_mask = 0;
25640     }
25641 }
25642
25643 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25644    UNITS_PER_WORD bytes wide.  */
25645 static unsigned int
25646 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25647 {
25648   if (IS_VPR_REGNUM (regno))
25649     return CEIL (GET_MODE_SIZE (mode), 2);
25650
25651   if (TARGET_32BIT
25652       && regno > PC_REGNUM
25653       && regno != FRAME_POINTER_REGNUM
25654       && regno != ARG_POINTER_REGNUM
25655       && !IS_VFP_REGNUM (regno))
25656     return 1;
25657
25658   return ARM_NUM_REGS (mode);
25659 }
25660
25661 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25662 static bool
25663 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25664 {
25665   if (GET_MODE_CLASS (mode) == MODE_CC)
25666     return (regno == CC_REGNUM
25667             || (TARGET_VFP_BASE
25668                 && regno == VFPCC_REGNUM));
25669
25670   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25671     return false;
25672
25673   if (IS_VPR_REGNUM (regno))
25674     return VALID_MVE_PRED_MODE (mode);
25675
25676   if (TARGET_THUMB1)
25677     /* For the Thumb we only allow values bigger than SImode in
25678        registers 0 - 6, so that there is always a second low
25679        register available to hold the upper part of the value.
25680        We probably we ought to ensure that the register is the
25681        start of an even numbered register pair.  */
25682     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25683
25684   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25685     {
25686       if (mode == DFmode || mode == DImode)
25687         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25688
25689       if (mode == HFmode || mode == BFmode || mode == HImode
25690           || mode == SFmode || mode == SImode)
25691         return VFP_REGNO_OK_FOR_SINGLE (regno);
25692
25693       if (TARGET_NEON)
25694         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25695                || (VALID_NEON_QREG_MODE (mode)
25696                    && NEON_REGNO_OK_FOR_QUAD (regno))
25697                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25698                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25699                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25700                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25701                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25702      if (TARGET_HAVE_MVE)
25703        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25704                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25705                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25706
25707       return false;
25708     }
25709
25710   if (TARGET_REALLY_IWMMXT)
25711     {
25712       if (IS_IWMMXT_GR_REGNUM (regno))
25713         return mode == SImode;
25714
25715       if (IS_IWMMXT_REGNUM (regno))
25716         return VALID_IWMMXT_REG_MODE (mode);
25717     }
25718
25719   /* We allow almost any value to be stored in the general registers.
25720      Restrict doubleword quantities to even register pairs in ARM state
25721      so that we can use ldrd. The same restriction applies for MVE
25722      in order to support Armv8.1-M Mainline instructions.
25723      Do not allow very large Neon structure  opaque modes in general
25724      registers; they would use too many.  */
25725   if (regno <= LAST_ARM_REGNUM)
25726     {
25727       if (ARM_NUM_REGS (mode) > 4)
25728         return false;
25729
25730       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25731         return true;
25732
25733       return !((TARGET_LDRD || TARGET_CDE)
25734                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25735     }
25736
25737   if (regno == FRAME_POINTER_REGNUM
25738       || regno == ARG_POINTER_REGNUM)
25739     /* We only allow integers in the fake hard registers.  */
25740     return GET_MODE_CLASS (mode) == MODE_INT;
25741
25742   return false;
25743 }
25744
25745 /* Implement TARGET_MODES_TIEABLE_P.  */
25746
25747 static bool
25748 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25749 {
25750   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25751     return true;
25752
25753   if (TARGET_HAVE_MVE
25754       && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25755     return true;
25756
25757   /* We specifically want to allow elements of "structure" modes to
25758      be tieable to the structure.  This more general condition allows
25759      other rarer situations too.  */
25760   if ((TARGET_NEON
25761        && (VALID_NEON_DREG_MODE (mode1)
25762            || VALID_NEON_QREG_MODE (mode1)
25763            || VALID_NEON_STRUCT_MODE (mode1))
25764        && (VALID_NEON_DREG_MODE (mode2)
25765            || VALID_NEON_QREG_MODE (mode2)
25766            || VALID_NEON_STRUCT_MODE (mode2)))
25767       || (TARGET_HAVE_MVE
25768           && (VALID_MVE_MODE (mode1)
25769               || VALID_MVE_STRUCT_MODE (mode1))
25770           && (VALID_MVE_MODE (mode2)
25771               || VALID_MVE_STRUCT_MODE (mode2))))
25772     return true;
25773
25774   return false;
25775 }
25776
25777 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25778    not used in arm mode.  */
25779
25780 enum reg_class
25781 arm_regno_class (int regno)
25782 {
25783   if (regno == PC_REGNUM)
25784     return NO_REGS;
25785
25786   if (IS_VPR_REGNUM (regno))
25787     return VPR_REG;
25788
25789   if (IS_PAC_REGNUM (regno))
25790     return PAC_REG;
25791
25792   if (TARGET_THUMB1)
25793     {
25794       if (regno == STACK_POINTER_REGNUM)
25795         return STACK_REG;
25796       if (regno == CC_REGNUM)
25797         return CC_REG;
25798       if (regno < 8)
25799         return LO_REGS;
25800       return HI_REGS;
25801     }
25802
25803   if (TARGET_THUMB2 && regno < 8)
25804     return LO_REGS;
25805
25806   if (   regno <= LAST_ARM_REGNUM
25807       || regno == FRAME_POINTER_REGNUM
25808       || regno == ARG_POINTER_REGNUM)
25809     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25810
25811   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25812     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25813
25814   if (IS_VFP_REGNUM (regno))
25815     {
25816       if (regno <= D7_VFP_REGNUM)
25817         return VFP_D0_D7_REGS;
25818       else if (regno <= LAST_LO_VFP_REGNUM)
25819         return VFP_LO_REGS;
25820       else
25821         return VFP_HI_REGS;
25822     }
25823
25824   if (IS_IWMMXT_REGNUM (regno))
25825     return IWMMXT_REGS;
25826
25827   if (IS_IWMMXT_GR_REGNUM (regno))
25828     return IWMMXT_GR_REGS;
25829
25830   return NO_REGS;
25831 }
25832
25833 /* Handle a special case when computing the offset
25834    of an argument from the frame pointer.  */
25835 int
25836 arm_debugger_arg_offset (int value, rtx addr)
25837 {
25838   rtx_insn *insn;
25839
25840   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25841   if (value != 0)
25842     return 0;
25843
25844   /* We can only cope with the case where the address is held in a register.  */
25845   if (!REG_P (addr))
25846     return 0;
25847
25848   /* If we are using the frame pointer to point at the argument, then
25849      an offset of 0 is correct.  */
25850   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25851     return 0;
25852
25853   /* If we are using the stack pointer to point at the
25854      argument, then an offset of 0 is correct.  */
25855   /* ??? Check this is consistent with thumb2 frame layout.  */
25856   if ((TARGET_THUMB || !frame_pointer_needed)
25857       && REGNO (addr) == SP_REGNUM)
25858     return 0;
25859
25860   /* Oh dear.  The argument is pointed to by a register rather
25861      than being held in a register, or being stored at a known
25862      offset from the frame pointer.  Since GDB only understands
25863      those two kinds of argument we must translate the address
25864      held in the register into an offset from the frame pointer.
25865      We do this by searching through the insns for the function
25866      looking to see where this register gets its value.  If the
25867      register is initialized from the frame pointer plus an offset
25868      then we are in luck and we can continue, otherwise we give up.
25869
25870      This code is exercised by producing debugging information
25871      for a function with arguments like this:
25872
25873            double func (double a, double b, int c, double d) {return d;}
25874
25875      Without this code the stab for parameter 'd' will be set to
25876      an offset of 0 from the frame pointer, rather than 8.  */
25877
25878   /* The if() statement says:
25879
25880      If the insn is a normal instruction
25881      and if the insn is setting the value in a register
25882      and if the register being set is the register holding the address of the argument
25883      and if the address is computing by an addition
25884      that involves adding to a register
25885      which is the frame pointer
25886      a constant integer
25887
25888      then...  */
25889
25890   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25891     {
25892       if (   NONJUMP_INSN_P (insn)
25893           && GET_CODE (PATTERN (insn)) == SET
25894           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25895           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25896           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25897           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25898           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25899              )
25900         {
25901           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25902
25903           break;
25904         }
25905     }
25906
25907   if (value == 0)
25908     {
25909       debug_rtx (addr);
25910       warning (0, "unable to compute real location of stacked parameter");
25911       value = 8; /* XXX magic hack */
25912     }
25913
25914   return value;
25915 }
25916 \f
25917 /* Implement TARGET_PROMOTED_TYPE.  */
25918
25919 static tree
25920 arm_promoted_type (const_tree t)
25921 {
25922   if (SCALAR_FLOAT_TYPE_P (t)
25923       && TYPE_PRECISION (t) == 16
25924       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25925     return float_type_node;
25926   return NULL_TREE;
25927 }
25928
25929 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25930    This simply adds HFmode as a supported mode; even though we don't
25931    implement arithmetic on this type directly, it's supported by
25932    optabs conversions, much the way the double-word arithmetic is
25933    special-cased in the default hook.  */
25934
25935 static bool
25936 arm_scalar_mode_supported_p (scalar_mode mode)
25937 {
25938   if (mode == HFmode)
25939     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25940   else if (ALL_FIXED_POINT_MODE_P (mode))
25941     return true;
25942   else
25943     return default_scalar_mode_supported_p (mode);
25944 }
25945
25946 /* Set the value of FLT_EVAL_METHOD.
25947    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25948
25949     0: evaluate all operations and constants, whose semantic type has at
25950        most the range and precision of type float, to the range and
25951        precision of float; evaluate all other operations and constants to
25952        the range and precision of the semantic type;
25953
25954     N, where _FloatN is a supported interchange floating type
25955        evaluate all operations and constants, whose semantic type has at
25956        most the range and precision of _FloatN type, to the range and
25957        precision of the _FloatN type; evaluate all other operations and
25958        constants to the range and precision of the semantic type;
25959
25960    If we have the ARMv8.2-A extensions then we support _Float16 in native
25961    precision, so we should set this to 16.  Otherwise, we support the type,
25962    but want to evaluate expressions in float precision, so set this to
25963    0.  */
25964
25965 static enum flt_eval_method
25966 arm_excess_precision (enum excess_precision_type type)
25967 {
25968   switch (type)
25969     {
25970       case EXCESS_PRECISION_TYPE_FAST:
25971       case EXCESS_PRECISION_TYPE_STANDARD:
25972         /* We can calculate either in 16-bit range and precision or
25973            32-bit range and precision.  Make that decision based on whether
25974            we have native support for the ARMv8.2-A 16-bit floating-point
25975            instructions or not.  */
25976         return (TARGET_VFP_FP16INST
25977                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25978                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25979       case EXCESS_PRECISION_TYPE_IMPLICIT:
25980       case EXCESS_PRECISION_TYPE_FLOAT16:
25981         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25982       default:
25983         gcc_unreachable ();
25984     }
25985   return FLT_EVAL_METHOD_UNPREDICTABLE;
25986 }
25987
25988
25989 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25990    _Float16 if we are using anything other than ieee format for 16-bit
25991    floating point.  Otherwise, punt to the default implementation.  */
25992 static opt_scalar_float_mode
25993 arm_floatn_mode (int n, bool extended)
25994 {
25995   if (!extended && n == 16)
25996     {
25997       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25998         return HFmode;
25999       return opt_scalar_float_mode ();
26000     }
26001
26002   return default_floatn_mode (n, extended);
26003 }
26004
26005
26006 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26007    not to early-clobber SRC registers in the process.
26008
26009    We assume that the operands described by SRC and DEST represent a
26010    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
26011    number of components into which the copy has been decomposed.  */
26012 void
26013 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26014 {
26015   unsigned int i;
26016
26017   if (!reg_overlap_mentioned_p (operands[0], operands[1])
26018       || REGNO (operands[0]) < REGNO (operands[1]))
26019     {
26020       for (i = 0; i < count; i++)
26021         {
26022           operands[2 * i] = dest[i];
26023           operands[2 * i + 1] = src[i];
26024         }
26025     }
26026   else
26027     {
26028       for (i = 0; i < count; i++)
26029         {
26030           operands[2 * i] = dest[count - i - 1];
26031           operands[2 * i + 1] = src[count - i - 1];
26032         }
26033     }
26034 }
26035
26036 /* Split operands into moves from op[1] + op[2] into op[0].  */
26037
26038 void
26039 neon_split_vcombine (rtx operands[3])
26040 {
26041   unsigned int dest = REGNO (operands[0]);
26042   unsigned int src1 = REGNO (operands[1]);
26043   unsigned int src2 = REGNO (operands[2]);
26044   machine_mode halfmode = GET_MODE (operands[1]);
26045   unsigned int halfregs = REG_NREGS (operands[1]);
26046   rtx destlo, desthi;
26047
26048   if (src1 == dest && src2 == dest + halfregs)
26049     {
26050       /* No-op move.  Can't split to nothing; emit something.  */
26051       emit_note (NOTE_INSN_DELETED);
26052       return;
26053     }
26054
26055   /* Preserve register attributes for variable tracking.  */
26056   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26057   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26058                                GET_MODE_SIZE (halfmode));
26059
26060   /* Special case of reversed high/low parts.  Use VSWP.  */
26061   if (src2 == dest && src1 == dest + halfregs)
26062     {
26063       rtx x = gen_rtx_SET (destlo, operands[1]);
26064       rtx y = gen_rtx_SET (desthi, operands[2]);
26065       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26066       return;
26067     }
26068
26069   if (!reg_overlap_mentioned_p (operands[2], destlo))
26070     {
26071       /* Try to avoid unnecessary moves if part of the result
26072          is in the right place already.  */
26073       if (src1 != dest)
26074         emit_move_insn (destlo, operands[1]);
26075       if (src2 != dest + halfregs)
26076         emit_move_insn (desthi, operands[2]);
26077     }
26078   else
26079     {
26080       if (src2 != dest + halfregs)
26081         emit_move_insn (desthi, operands[2]);
26082       if (src1 != dest)
26083         emit_move_insn (destlo, operands[1]);
26084     }
26085 }
26086 \f
26087 /* Return the number (counting from 0) of
26088    the least significant set bit in MASK.  */
26089
26090 inline static int
26091 number_of_first_bit_set (unsigned mask)
26092 {
26093   return ctz_hwi (mask);
26094 }
26095
26096 /* Like emit_multi_reg_push, but allowing for a different set of
26097    registers to be described as saved.  MASK is the set of registers
26098    to be saved; REAL_REGS is the set of registers to be described as
26099    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26100
26101 static rtx_insn *
26102 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26103 {
26104   unsigned long regno;
26105   rtx par[10], tmp, reg;
26106   rtx_insn *insn;
26107   int i, j;
26108
26109   /* Build the parallel of the registers actually being stored.  */
26110   for (i = 0; mask; ++i, mask &= mask - 1)
26111     {
26112       regno = ctz_hwi (mask);
26113       reg = gen_rtx_REG (SImode, regno);
26114
26115       if (i == 0)
26116         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26117       else
26118         tmp = gen_rtx_USE (VOIDmode, reg);
26119
26120       par[i] = tmp;
26121     }
26122
26123   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26124   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26125   tmp = gen_frame_mem (BLKmode, tmp);
26126   tmp = gen_rtx_SET (tmp, par[0]);
26127   par[0] = tmp;
26128
26129   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26130   insn = emit_insn (tmp);
26131
26132   /* Always build the stack adjustment note for unwind info.  */
26133   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26134   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26135   par[0] = tmp;
26136
26137   /* Build the parallel of the registers recorded as saved for unwind.  */
26138   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26139     {
26140       regno = ctz_hwi (real_regs);
26141       reg = gen_rtx_REG (SImode, regno);
26142
26143       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26144       tmp = gen_frame_mem (SImode, tmp);
26145       tmp = gen_rtx_SET (tmp, reg);
26146       RTX_FRAME_RELATED_P (tmp) = 1;
26147       par[j + 1] = tmp;
26148     }
26149
26150   if (j == 0)
26151     tmp = par[0];
26152   else
26153     {
26154       RTX_FRAME_RELATED_P (par[0]) = 1;
26155       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26156     }
26157
26158   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26159
26160   return insn;
26161 }
26162
26163 /* Emit code to push or pop registers to or from the stack.  F is the
26164    assembly file.  MASK is the registers to pop.  */
26165 static void
26166 thumb_pop (FILE *f, unsigned long mask)
26167 {
26168   int regno;
26169   int lo_mask = mask & 0xFF;
26170
26171   gcc_assert (mask);
26172
26173   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26174     {
26175       /* Special case.  Do not generate a POP PC statement here, do it in
26176          thumb_exit() */
26177       thumb_exit (f, -1);
26178       return;
26179     }
26180
26181   fprintf (f, "\tpop\t{");
26182
26183   /* Look at the low registers first.  */
26184   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26185     {
26186       if (lo_mask & 1)
26187         {
26188           asm_fprintf (f, "%r", regno);
26189
26190           if ((lo_mask & ~1) != 0)
26191             fprintf (f, ", ");
26192         }
26193     }
26194
26195   if (mask & (1 << PC_REGNUM))
26196     {
26197       /* Catch popping the PC.  */
26198       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26199           || IS_CMSE_ENTRY (arm_current_func_type ()))
26200         {
26201           /* The PC is never poped directly, instead
26202              it is popped into r3 and then BX is used.  */
26203           fprintf (f, "}\n");
26204
26205           thumb_exit (f, -1);
26206
26207           return;
26208         }
26209       else
26210         {
26211           if (mask & 0xFF)
26212             fprintf (f, ", ");
26213
26214           asm_fprintf (f, "%r", PC_REGNUM);
26215         }
26216     }
26217
26218   fprintf (f, "}\n");
26219 }
26220
26221 /* Generate code to return from a thumb function.
26222    If 'reg_containing_return_addr' is -1, then the return address is
26223    actually on the stack, at the stack pointer.
26224
26225    Note: do not forget to update length attribute of corresponding insn pattern
26226    when changing assembly output (eg. length attribute of epilogue_insns when
26227    updating Armv8-M Baseline Security Extensions register clearing
26228    sequences).  */
26229 static void
26230 thumb_exit (FILE *f, int reg_containing_return_addr)
26231 {
26232   unsigned regs_available_for_popping;
26233   unsigned regs_to_pop;
26234   int pops_needed;
26235   unsigned available;
26236   unsigned required;
26237   machine_mode mode;
26238   int size;
26239   int restore_a4 = FALSE;
26240
26241   /* Compute the registers we need to pop.  */
26242   regs_to_pop = 0;
26243   pops_needed = 0;
26244
26245   if (reg_containing_return_addr == -1)
26246     {
26247       regs_to_pop |= 1 << LR_REGNUM;
26248       ++pops_needed;
26249     }
26250
26251   if (TARGET_BACKTRACE)
26252     {
26253       /* Restore the (ARM) frame pointer and stack pointer.  */
26254       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26255       pops_needed += 2;
26256     }
26257
26258   /* If there is nothing to pop then just emit the BX instruction and
26259      return.  */
26260   if (pops_needed == 0)
26261     {
26262       if (crtl->calls_eh_return)
26263         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26264
26265       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26266         {
26267           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26268              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26269           if (!TARGET_HAVE_FPCXT_CMSE)
26270             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26271                          reg_containing_return_addr);
26272           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26273         }
26274       else
26275         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26276       return;
26277     }
26278   /* Otherwise if we are not supporting interworking and we have not created
26279      a backtrace structure and the function was not entered in ARM mode then
26280      just pop the return address straight into the PC.  */
26281   else if (!TARGET_INTERWORK
26282            && !TARGET_BACKTRACE
26283            && !is_called_in_ARM_mode (current_function_decl)
26284            && !crtl->calls_eh_return
26285            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26286     {
26287       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26288       return;
26289     }
26290
26291   /* Find out how many of the (return) argument registers we can corrupt.  */
26292   regs_available_for_popping = 0;
26293
26294   /* If returning via __builtin_eh_return, the bottom three registers
26295      all contain information needed for the return.  */
26296   if (crtl->calls_eh_return)
26297     size = 12;
26298   else
26299     {
26300       /* If we can deduce the registers used from the function's
26301          return value.  This is more reliable that examining
26302          df_regs_ever_live_p () because that will be set if the register is
26303          ever used in the function, not just if the register is used
26304          to hold a return value.  */
26305
26306       if (crtl->return_rtx != 0)
26307         mode = GET_MODE (crtl->return_rtx);
26308       else
26309         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26310
26311       size = GET_MODE_SIZE (mode);
26312
26313       if (size == 0)
26314         {
26315           /* In a void function we can use any argument register.
26316              In a function that returns a structure on the stack
26317              we can use the second and third argument registers.  */
26318           if (mode == VOIDmode)
26319             regs_available_for_popping =
26320               (1 << ARG_REGISTER (1))
26321               | (1 << ARG_REGISTER (2))
26322               | (1 << ARG_REGISTER (3));
26323           else
26324             regs_available_for_popping =
26325               (1 << ARG_REGISTER (2))
26326               | (1 << ARG_REGISTER (3));
26327         }
26328       else if (size <= 4)
26329         regs_available_for_popping =
26330           (1 << ARG_REGISTER (2))
26331           | (1 << ARG_REGISTER (3));
26332       else if (size <= 8)
26333         regs_available_for_popping =
26334           (1 << ARG_REGISTER (3));
26335     }
26336
26337   /* Match registers to be popped with registers into which we pop them.  */
26338   for (available = regs_available_for_popping,
26339        required  = regs_to_pop;
26340        required != 0 && available != 0;
26341        available &= ~(available & - available),
26342        required  &= ~(required  & - required))
26343     -- pops_needed;
26344
26345   /* If we have any popping registers left over, remove them.  */
26346   if (available > 0)
26347     regs_available_for_popping &= ~available;
26348
26349   /* Otherwise if we need another popping register we can use
26350      the fourth argument register.  */
26351   else if (pops_needed)
26352     {
26353       /* If we have not found any free argument registers and
26354          reg a4 contains the return address, we must move it.  */
26355       if (regs_available_for_popping == 0
26356           && reg_containing_return_addr == LAST_ARG_REGNUM)
26357         {
26358           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26359           reg_containing_return_addr = LR_REGNUM;
26360         }
26361       else if (size > 12)
26362         {
26363           /* Register a4 is being used to hold part of the return value,
26364              but we have dire need of a free, low register.  */
26365           restore_a4 = TRUE;
26366
26367           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26368         }
26369
26370       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26371         {
26372           /* The fourth argument register is available.  */
26373           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26374
26375           --pops_needed;
26376         }
26377     }
26378
26379   /* Pop as many registers as we can.  */
26380   thumb_pop (f, regs_available_for_popping);
26381
26382   /* Process the registers we popped.  */
26383   if (reg_containing_return_addr == -1)
26384     {
26385       /* The return address was popped into the lowest numbered register.  */
26386       regs_to_pop &= ~(1 << LR_REGNUM);
26387
26388       reg_containing_return_addr =
26389         number_of_first_bit_set (regs_available_for_popping);
26390
26391       /* Remove this register for the mask of available registers, so that
26392          the return address will not be corrupted by further pops.  */
26393       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26394     }
26395
26396   /* If we popped other registers then handle them here.  */
26397   if (regs_available_for_popping)
26398     {
26399       int frame_pointer;
26400
26401       /* Work out which register currently contains the frame pointer.  */
26402       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26403
26404       /* Move it into the correct place.  */
26405       asm_fprintf (f, "\tmov\t%r, %r\n",
26406                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26407
26408       /* (Temporarily) remove it from the mask of popped registers.  */
26409       regs_available_for_popping &= ~(1 << frame_pointer);
26410       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26411
26412       if (regs_available_for_popping)
26413         {
26414           int stack_pointer;
26415
26416           /* We popped the stack pointer as well,
26417              find the register that contains it.  */
26418           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26419
26420           /* Move it into the stack register.  */
26421           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26422
26423           /* At this point we have popped all necessary registers, so
26424              do not worry about restoring regs_available_for_popping
26425              to its correct value:
26426
26427              assert (pops_needed == 0)
26428              assert (regs_available_for_popping == (1 << frame_pointer))
26429              assert (regs_to_pop == (1 << STACK_POINTER))  */
26430         }
26431       else
26432         {
26433           /* Since we have just move the popped value into the frame
26434              pointer, the popping register is available for reuse, and
26435              we know that we still have the stack pointer left to pop.  */
26436           regs_available_for_popping |= (1 << frame_pointer);
26437         }
26438     }
26439
26440   /* If we still have registers left on the stack, but we no longer have
26441      any registers into which we can pop them, then we must move the return
26442      address into the link register and make available the register that
26443      contained it.  */
26444   if (regs_available_for_popping == 0 && pops_needed > 0)
26445     {
26446       regs_available_for_popping |= 1 << reg_containing_return_addr;
26447
26448       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26449                    reg_containing_return_addr);
26450
26451       reg_containing_return_addr = LR_REGNUM;
26452     }
26453
26454   /* If we have registers left on the stack then pop some more.
26455      We know that at most we will want to pop FP and SP.  */
26456   if (pops_needed > 0)
26457     {
26458       int  popped_into;
26459       int  move_to;
26460
26461       thumb_pop (f, regs_available_for_popping);
26462
26463       /* We have popped either FP or SP.
26464          Move whichever one it is into the correct register.  */
26465       popped_into = number_of_first_bit_set (regs_available_for_popping);
26466       move_to     = number_of_first_bit_set (regs_to_pop);
26467
26468       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26469       --pops_needed;
26470     }
26471
26472   /* If we still have not popped everything then we must have only
26473      had one register available to us and we are now popping the SP.  */
26474   if (pops_needed > 0)
26475     {
26476       int  popped_into;
26477
26478       thumb_pop (f, regs_available_for_popping);
26479
26480       popped_into = number_of_first_bit_set (regs_available_for_popping);
26481
26482       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26483       /*
26484         assert (regs_to_pop == (1 << STACK_POINTER))
26485         assert (pops_needed == 1)
26486       */
26487     }
26488
26489   /* If necessary restore the a4 register.  */
26490   if (restore_a4)
26491     {
26492       if (reg_containing_return_addr != LR_REGNUM)
26493         {
26494           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26495           reg_containing_return_addr = LR_REGNUM;
26496         }
26497
26498       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26499     }
26500
26501   if (crtl->calls_eh_return)
26502     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26503
26504   /* Return to caller.  */
26505   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26506     {
26507       /* This is for the cases where LR is not being used to contain the return
26508          address.  It may therefore contain information that we might not want
26509          to leak, hence it must be cleared.  The value in R0 will never be a
26510          secret at this point, so it is safe to use it, see the clearing code
26511          in cmse_nonsecure_entry_clear_before_return ().  */
26512       if (reg_containing_return_addr != LR_REGNUM)
26513         asm_fprintf (f, "\tmov\tlr, r0\n");
26514
26515       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26516          by cmse_nonsecure_entry_clear_before_return ().  */
26517       if (!TARGET_HAVE_FPCXT_CMSE)
26518         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26519       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26520     }
26521   else
26522     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26523 }
26524 \f
26525 /* Scan INSN just before assembler is output for it.
26526    For Thumb-1, we track the status of the condition codes; this
26527    information is used in the cbranchsi4_insn pattern.  */
26528 void
26529 thumb1_final_prescan_insn (rtx_insn *insn)
26530 {
26531   if (flag_print_asm_name)
26532     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26533                  INSN_ADDRESSES (INSN_UID (insn)));
26534   /* Don't overwrite the previous setter when we get to a cbranch.  */
26535   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26536     {
26537       enum attr_conds conds;
26538
26539       if (cfun->machine->thumb1_cc_insn)
26540         {
26541           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26542               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26543             CC_STATUS_INIT;
26544         }
26545       conds = get_attr_conds (insn);
26546       if (conds == CONDS_SET)
26547         {
26548           rtx set = single_set (insn);
26549           cfun->machine->thumb1_cc_insn = insn;
26550           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26551           cfun->machine->thumb1_cc_op1 = const0_rtx;
26552           cfun->machine->thumb1_cc_mode = CC_NZmode;
26553           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26554             {
26555               rtx src1 = XEXP (SET_SRC (set), 1);
26556               if (src1 == const0_rtx)
26557                 cfun->machine->thumb1_cc_mode = CCmode;
26558             }
26559           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26560             {
26561               /* Record the src register operand instead of dest because
26562                  cprop_hardreg pass propagates src.  */
26563               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26564             }
26565         }
26566       else if (conds != CONDS_NOCOND)
26567         cfun->machine->thumb1_cc_insn = NULL_RTX;
26568     }
26569
26570     /* Check if unexpected far jump is used.  */
26571     if (cfun->machine->lr_save_eliminated
26572         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26573       internal_error("Unexpected thumb1 far jump");
26574 }
26575
26576 int
26577 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26578 {
26579   unsigned HOST_WIDE_INT mask = 0xff;
26580   int i;
26581
26582   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26583   if (val == 0) /* XXX */
26584     return 0;
26585
26586   for (i = 0; i < 25; i++)
26587     if ((val & (mask << i)) == val)
26588       return 1;
26589
26590   return 0;
26591 }
26592
26593 /* Returns nonzero if the current function contains,
26594    or might contain a far jump.  */
26595 static int
26596 thumb_far_jump_used_p (void)
26597 {
26598   rtx_insn *insn;
26599   bool far_jump = false;
26600   unsigned int func_size = 0;
26601
26602   /* If we have already decided that far jumps may be used,
26603      do not bother checking again, and always return true even if
26604      it turns out that they are not being used.  Once we have made
26605      the decision that far jumps are present (and that hence the link
26606      register will be pushed onto the stack) we cannot go back on it.  */
26607   if (cfun->machine->far_jump_used)
26608     return 1;
26609
26610   /* If this function is not being called from the prologue/epilogue
26611      generation code then it must be being called from the
26612      INITIAL_ELIMINATION_OFFSET macro.  */
26613   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26614     {
26615       /* In this case we know that we are being asked about the elimination
26616          of the arg pointer register.  If that register is not being used,
26617          then there are no arguments on the stack, and we do not have to
26618          worry that a far jump might force the prologue to push the link
26619          register, changing the stack offsets.  In this case we can just
26620          return false, since the presence of far jumps in the function will
26621          not affect stack offsets.
26622
26623          If the arg pointer is live (or if it was live, but has now been
26624          eliminated and so set to dead) then we do have to test to see if
26625          the function might contain a far jump.  This test can lead to some
26626          false negatives, since before reload is completed, then length of
26627          branch instructions is not known, so gcc defaults to returning their
26628          longest length, which in turn sets the far jump attribute to true.
26629
26630          A false negative will not result in bad code being generated, but it
26631          will result in a needless push and pop of the link register.  We
26632          hope that this does not occur too often.
26633
26634          If we need doubleword stack alignment this could affect the other
26635          elimination offsets so we can't risk getting it wrong.  */
26636       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26637         cfun->machine->arg_pointer_live = 1;
26638       else if (!cfun->machine->arg_pointer_live)
26639         return 0;
26640     }
26641
26642   /* We should not change far_jump_used during or after reload, as there is
26643      no chance to change stack frame layout.  */
26644   if (reload_in_progress || reload_completed)
26645     return 0;
26646
26647   /* Check to see if the function contains a branch
26648      insn with the far jump attribute set.  */
26649   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26650     {
26651       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26652         {
26653           far_jump = true;
26654         }
26655       func_size += get_attr_length (insn);
26656     }
26657
26658   /* Attribute far_jump will always be true for thumb1 before
26659      shorten_branch pass.  So checking far_jump attribute before
26660      shorten_branch isn't much useful.
26661
26662      Following heuristic tries to estimate more accurately if a far jump
26663      may finally be used.  The heuristic is very conservative as there is
26664      no chance to roll-back the decision of not to use far jump.
26665
26666      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26667      2-byte insn is associated with a 4 byte constant pool.  Using
26668      function size 2048/3 as the threshold is conservative enough.  */
26669   if (far_jump)
26670     {
26671       if ((func_size * 3) >= 2048)
26672         {
26673           /* Record the fact that we have decided that
26674              the function does use far jumps.  */
26675           cfun->machine->far_jump_used = 1;
26676           return 1;
26677         }
26678     }
26679
26680   return 0;
26681 }
26682
26683 /* Return nonzero if FUNC must be entered in ARM mode.  */
26684 static bool
26685 is_called_in_ARM_mode (tree func)
26686 {
26687   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26688
26689   /* Ignore the problem about functions whose address is taken.  */
26690   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26691     return true;
26692
26693 #ifdef ARM_PE
26694   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26695 #else
26696   return false;
26697 #endif
26698 }
26699
26700 /* Given the stack offsets and register mask in OFFSETS, decide how
26701    many additional registers to push instead of subtracting a constant
26702    from SP.  For epilogues the principle is the same except we use pop.
26703    FOR_PROLOGUE indicates which we're generating.  */
26704 static int
26705 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26706 {
26707   HOST_WIDE_INT amount;
26708   unsigned long live_regs_mask = offsets->saved_regs_mask;
26709   /* Extract a mask of the ones we can give to the Thumb's push/pop
26710      instruction.  */
26711   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26712   /* Then count how many other high registers will need to be pushed.  */
26713   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26714   int n_free, reg_base, size;
26715
26716   if (!for_prologue && frame_pointer_needed)
26717     amount = offsets->locals_base - offsets->saved_regs;
26718   else
26719     amount = offsets->outgoing_args - offsets->saved_regs;
26720
26721   /* If the stack frame size is 512 exactly, we can save one load
26722      instruction, which should make this a win even when optimizing
26723      for speed.  */
26724   if (!optimize_size && amount != 512)
26725     return 0;
26726
26727   /* Can't do this if there are high registers to push.  */
26728   if (high_regs_pushed != 0)
26729     return 0;
26730
26731   /* Shouldn't do it in the prologue if no registers would normally
26732      be pushed at all.  In the epilogue, also allow it if we'll have
26733      a pop insn for the PC.  */
26734   if  (l_mask == 0
26735        && (for_prologue
26736            || TARGET_BACKTRACE
26737            || (live_regs_mask & 1 << LR_REGNUM) == 0
26738            || TARGET_INTERWORK
26739            || crtl->args.pretend_args_size != 0))
26740     return 0;
26741
26742   /* Don't do this if thumb_expand_prologue wants to emit instructions
26743      between the push and the stack frame allocation.  */
26744   if (for_prologue
26745       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26746           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26747     return 0;
26748
26749   reg_base = 0;
26750   n_free = 0;
26751   if (!for_prologue)
26752     {
26753       size = arm_size_return_regs ();
26754       reg_base = ARM_NUM_INTS (size);
26755       live_regs_mask >>= reg_base;
26756     }
26757
26758   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26759          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26760     {
26761       live_regs_mask >>= 1;
26762       n_free++;
26763     }
26764
26765   if (n_free == 0)
26766     return 0;
26767   gcc_assert (amount / 4 * 4 == amount);
26768
26769   if (amount >= 512 && (amount - n_free * 4) < 512)
26770     return (amount - 508) / 4;
26771   if (amount <= n_free * 4)
26772     return amount / 4;
26773   return 0;
26774 }
26775
26776 /* The bits which aren't usefully expanded as rtl.  */
26777 const char *
26778 thumb1_unexpanded_epilogue (void)
26779 {
26780   arm_stack_offsets *offsets;
26781   int regno;
26782   unsigned long live_regs_mask = 0;
26783   int high_regs_pushed = 0;
26784   int extra_pop;
26785   int had_to_push_lr;
26786   int size;
26787
26788   if (cfun->machine->return_used_this_function != 0)
26789     return "";
26790
26791   if (IS_NAKED (arm_current_func_type ()))
26792     return "";
26793
26794   offsets = arm_get_frame_offsets ();
26795   live_regs_mask = offsets->saved_regs_mask;
26796   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26797
26798   /* If we can deduce the registers used from the function's return value.
26799      This is more reliable that examining df_regs_ever_live_p () because that
26800      will be set if the register is ever used in the function, not just if
26801      the register is used to hold a return value.  */
26802   size = arm_size_return_regs ();
26803
26804   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26805   if (extra_pop > 0)
26806     {
26807       unsigned long extra_mask = (1 << extra_pop) - 1;
26808       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26809     }
26810
26811   /* The prolog may have pushed some high registers to use as
26812      work registers.  e.g. the testsuite file:
26813      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26814      compiles to produce:
26815         push    {r4, r5, r6, r7, lr}
26816         mov     r7, r9
26817         mov     r6, r8
26818         push    {r6, r7}
26819      as part of the prolog.  We have to undo that pushing here.  */
26820
26821   if (high_regs_pushed)
26822     {
26823       unsigned long mask = live_regs_mask & 0xff;
26824       int next_hi_reg;
26825
26826       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26827
26828       if (mask == 0)
26829         /* Oh dear!  We have no low registers into which we can pop
26830            high registers!  */
26831         internal_error
26832           ("no low registers available for popping high registers");
26833
26834       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26835         if (live_regs_mask & (1 << next_hi_reg))
26836           break;
26837
26838       while (high_regs_pushed)
26839         {
26840           /* Find lo register(s) into which the high register(s) can
26841              be popped.  */
26842           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26843             {
26844               if (mask & (1 << regno))
26845                 high_regs_pushed--;
26846               if (high_regs_pushed == 0)
26847                 break;
26848             }
26849
26850           if (high_regs_pushed == 0 && regno >= 0)
26851             mask &= ~((1 << regno) - 1);
26852
26853           /* Pop the values into the low register(s).  */
26854           thumb_pop (asm_out_file, mask);
26855
26856           /* Move the value(s) into the high registers.  */
26857           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26858             {
26859               if (mask & (1 << regno))
26860                 {
26861                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26862                                regno);
26863
26864                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26865                        next_hi_reg--)
26866                     if (live_regs_mask & (1 << next_hi_reg))
26867                       break;
26868                 }
26869             }
26870         }
26871       live_regs_mask &= ~0x0f00;
26872     }
26873
26874   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26875   live_regs_mask &= 0xff;
26876
26877   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26878     {
26879       /* Pop the return address into the PC.  */
26880       if (had_to_push_lr)
26881         live_regs_mask |= 1 << PC_REGNUM;
26882
26883       /* Either no argument registers were pushed or a backtrace
26884          structure was created which includes an adjusted stack
26885          pointer, so just pop everything.  */
26886       if (live_regs_mask)
26887         thumb_pop (asm_out_file, live_regs_mask);
26888
26889       /* We have either just popped the return address into the
26890          PC or it is was kept in LR for the entire function.
26891          Note that thumb_pop has already called thumb_exit if the
26892          PC was in the list.  */
26893       if (!had_to_push_lr)
26894         thumb_exit (asm_out_file, LR_REGNUM);
26895     }
26896   else
26897     {
26898       /* Pop everything but the return address.  */
26899       if (live_regs_mask)
26900         thumb_pop (asm_out_file, live_regs_mask);
26901
26902       if (had_to_push_lr)
26903         {
26904           if (size > 12)
26905             {
26906               /* We have no free low regs, so save one.  */
26907               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26908                            LAST_ARG_REGNUM);
26909             }
26910
26911           /* Get the return address into a temporary register.  */
26912           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26913
26914           if (size > 12)
26915             {
26916               /* Move the return address to lr.  */
26917               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26918                            LAST_ARG_REGNUM);
26919               /* Restore the low register.  */
26920               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26921                            IP_REGNUM);
26922               regno = LR_REGNUM;
26923             }
26924           else
26925             regno = LAST_ARG_REGNUM;
26926         }
26927       else
26928         regno = LR_REGNUM;
26929
26930       /* Remove the argument registers that were pushed onto the stack.  */
26931       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26932                    SP_REGNUM, SP_REGNUM,
26933                    crtl->args.pretend_args_size);
26934
26935       thumb_exit (asm_out_file, regno);
26936     }
26937
26938   return "";
26939 }
26940
26941 /* Functions to save and restore machine-specific function data.  */
26942 static struct machine_function *
26943 arm_init_machine_status (void)
26944 {
26945   struct machine_function *machine;
26946   machine = ggc_cleared_alloc<machine_function> ();
26947
26948 #if ARM_FT_UNKNOWN != 0
26949   machine->func_type = ARM_FT_UNKNOWN;
26950 #endif
26951   machine->static_chain_stack_bytes = -1;
26952   machine->pacspval_needed = 0;
26953   return machine;
26954 }
26955
26956 /* Return an RTX indicating where the return address to the
26957    calling function can be found.  */
26958 rtx
26959 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26960 {
26961   if (count != 0)
26962     return NULL_RTX;
26963
26964   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26965 }
26966
26967 /* Do anything needed before RTL is emitted for each function.  */
26968 void
26969 arm_init_expanders (void)
26970 {
26971   /* Arrange to initialize and mark the machine per-function status.  */
26972   init_machine_status = arm_init_machine_status;
26973
26974   /* This is to stop the combine pass optimizing away the alignment
26975      adjustment of va_arg.  */
26976   /* ??? It is claimed that this should not be necessary.  */
26977   if (cfun)
26978     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26979 }
26980
26981 /* Check that FUNC is called with a different mode.  */
26982
26983 bool
26984 arm_change_mode_p (tree func)
26985 {
26986   if (TREE_CODE (func) != FUNCTION_DECL)
26987     return false;
26988
26989   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26990
26991   if (!callee_tree)
26992     callee_tree = target_option_default_node;
26993
26994   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26995   int flags = callee_opts->x_target_flags;
26996
26997   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26998 }
26999
27000 /* Like arm_compute_initial_elimination offset.  Simpler because there
27001    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27002    to point at the base of the local variables after static stack
27003    space for a function has been allocated.  */
27004
27005 HOST_WIDE_INT
27006 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27007 {
27008   arm_stack_offsets *offsets;
27009
27010   offsets = arm_get_frame_offsets ();
27011
27012   switch (from)
27013     {
27014     case ARG_POINTER_REGNUM:
27015       switch (to)
27016         {
27017         case STACK_POINTER_REGNUM:
27018           return offsets->outgoing_args - offsets->saved_args;
27019
27020         case FRAME_POINTER_REGNUM:
27021           return offsets->soft_frame - offsets->saved_args;
27022
27023         case ARM_HARD_FRAME_POINTER_REGNUM:
27024           return offsets->saved_regs - offsets->saved_args;
27025
27026         case THUMB_HARD_FRAME_POINTER_REGNUM:
27027           return offsets->locals_base - offsets->saved_args;
27028
27029         default:
27030           gcc_unreachable ();
27031         }
27032       break;
27033
27034     case FRAME_POINTER_REGNUM:
27035       switch (to)
27036         {
27037         case STACK_POINTER_REGNUM:
27038           return offsets->outgoing_args - offsets->soft_frame;
27039
27040         case ARM_HARD_FRAME_POINTER_REGNUM:
27041           return offsets->saved_regs - offsets->soft_frame;
27042
27043         case THUMB_HARD_FRAME_POINTER_REGNUM:
27044           return offsets->locals_base - offsets->soft_frame;
27045
27046         default:
27047           gcc_unreachable ();
27048         }
27049       break;
27050
27051     default:
27052       gcc_unreachable ();
27053     }
27054 }
27055
27056 /* Generate the function's prologue.  */
27057
27058 void
27059 thumb1_expand_prologue (void)
27060 {
27061   rtx_insn *insn;
27062
27063   HOST_WIDE_INT amount;
27064   HOST_WIDE_INT size;
27065   arm_stack_offsets *offsets;
27066   unsigned long func_type;
27067   int regno;
27068   unsigned long live_regs_mask;
27069   unsigned long l_mask;
27070   unsigned high_regs_pushed = 0;
27071   bool lr_needs_saving;
27072
27073   func_type = arm_current_func_type ();
27074
27075   /* Naked functions don't have prologues.  */
27076   if (IS_NAKED (func_type))
27077     {
27078       if (flag_stack_usage_info)
27079         current_function_static_stack_size = 0;
27080       return;
27081     }
27082
27083   if (IS_INTERRUPT (func_type))
27084     {
27085       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27086       return;
27087     }
27088
27089   if (is_called_in_ARM_mode (current_function_decl))
27090     emit_insn (gen_prologue_thumb1_interwork ());
27091
27092   offsets = arm_get_frame_offsets ();
27093   live_regs_mask = offsets->saved_regs_mask;
27094   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27095
27096   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27097   l_mask = live_regs_mask & 0x40ff;
27098   /* Then count how many other high registers will need to be pushed.  */
27099   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27100
27101   if (crtl->args.pretend_args_size)
27102     {
27103       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27104
27105       if (cfun->machine->uses_anonymous_args)
27106         {
27107           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27108           unsigned long mask;
27109
27110           mask = 1ul << (LAST_ARG_REGNUM + 1);
27111           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27112
27113           insn = thumb1_emit_multi_reg_push (mask, 0);
27114         }
27115       else
27116         {
27117           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27118                                         stack_pointer_rtx, x));
27119         }
27120       RTX_FRAME_RELATED_P (insn) = 1;
27121     }
27122
27123   if (TARGET_BACKTRACE)
27124     {
27125       HOST_WIDE_INT offset = 0;
27126       unsigned work_register;
27127       rtx work_reg, x, arm_hfp_rtx;
27128
27129       /* We have been asked to create a stack backtrace structure.
27130          The code looks like this:
27131
27132          0   .align 2
27133          0   func:
27134          0     sub   SP, #16         Reserve space for 4 registers.
27135          2     push  {R7}            Push low registers.
27136          4     add   R7, SP, #20     Get the stack pointer before the push.
27137          6     str   R7, [SP, #8]    Store the stack pointer
27138                                         (before reserving the space).
27139          8     mov   R7, PC          Get hold of the start of this code + 12.
27140         10     str   R7, [SP, #16]   Store it.
27141         12     mov   R7, FP          Get hold of the current frame pointer.
27142         14     str   R7, [SP, #4]    Store it.
27143         16     mov   R7, LR          Get hold of the current return address.
27144         18     str   R7, [SP, #12]   Store it.
27145         20     add   R7, SP, #16     Point at the start of the
27146                                         backtrace structure.
27147         22     mov   FP, R7          Put this value into the frame pointer.  */
27148
27149       work_register = thumb_find_work_register (live_regs_mask);
27150       work_reg = gen_rtx_REG (SImode, work_register);
27151       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27152
27153       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27154                                     stack_pointer_rtx, GEN_INT (-16)));
27155       RTX_FRAME_RELATED_P (insn) = 1;
27156
27157       if (l_mask)
27158         {
27159           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27160           RTX_FRAME_RELATED_P (insn) = 1;
27161           lr_needs_saving = false;
27162
27163           offset = bit_count (l_mask) * UNITS_PER_WORD;
27164         }
27165
27166       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27167       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27168
27169       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27170       x = gen_frame_mem (SImode, x);
27171       emit_move_insn (x, work_reg);
27172
27173       /* Make sure that the instruction fetching the PC is in the right place
27174          to calculate "start of backtrace creation code + 12".  */
27175       /* ??? The stores using the common WORK_REG ought to be enough to
27176          prevent the scheduler from doing anything weird.  Failing that
27177          we could always move all of the following into an UNSPEC_VOLATILE.  */
27178       if (l_mask)
27179         {
27180           x = gen_rtx_REG (SImode, PC_REGNUM);
27181           emit_move_insn (work_reg, x);
27182
27183           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27184           x = gen_frame_mem (SImode, x);
27185           emit_move_insn (x, work_reg);
27186
27187           emit_move_insn (work_reg, arm_hfp_rtx);
27188
27189           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27190           x = gen_frame_mem (SImode, x);
27191           emit_move_insn (x, work_reg);
27192         }
27193       else
27194         {
27195           emit_move_insn (work_reg, arm_hfp_rtx);
27196
27197           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27198           x = gen_frame_mem (SImode, x);
27199           emit_move_insn (x, work_reg);
27200
27201           x = gen_rtx_REG (SImode, PC_REGNUM);
27202           emit_move_insn (work_reg, x);
27203
27204           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27205           x = gen_frame_mem (SImode, x);
27206           emit_move_insn (x, work_reg);
27207         }
27208
27209       x = gen_rtx_REG (SImode, LR_REGNUM);
27210       emit_move_insn (work_reg, x);
27211
27212       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27213       x = gen_frame_mem (SImode, x);
27214       emit_move_insn (x, work_reg);
27215
27216       x = GEN_INT (offset + 12);
27217       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27218
27219       emit_move_insn (arm_hfp_rtx, work_reg);
27220     }
27221   /* Optimization:  If we are not pushing any low registers but we are going
27222      to push some high registers then delay our first push.  This will just
27223      be a push of LR and we can combine it with the push of the first high
27224      register.  */
27225   else if ((l_mask & 0xff) != 0
27226            || (high_regs_pushed == 0 && lr_needs_saving))
27227     {
27228       unsigned long mask = l_mask;
27229       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27230       insn = thumb1_emit_multi_reg_push (mask, mask);
27231       RTX_FRAME_RELATED_P (insn) = 1;
27232       lr_needs_saving = false;
27233     }
27234
27235   if (high_regs_pushed)
27236     {
27237       unsigned pushable_regs;
27238       unsigned next_hi_reg;
27239       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27240                                                  : crtl->args.info.nregs;
27241       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27242
27243       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27244         if (live_regs_mask & (1 << next_hi_reg))
27245           break;
27246
27247       /* Here we need to mask out registers used for passing arguments
27248          even if they can be pushed.  This is to avoid using them to
27249          stash the high registers.  Such kind of stash may clobber the
27250          use of arguments.  */
27251       pushable_regs = l_mask & (~arg_regs_mask);
27252       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27253
27254       /* Normally, LR can be used as a scratch register once it has been
27255          saved; but if the function examines its own return address then
27256          the value is still live and we need to avoid using it.  */
27257       bool return_addr_live
27258         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27259                            LR_REGNUM);
27260
27261       if (lr_needs_saving || return_addr_live)
27262         pushable_regs &= ~(1 << LR_REGNUM);
27263
27264       if (pushable_regs == 0)
27265         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27266
27267       while (high_regs_pushed > 0)
27268         {
27269           unsigned long real_regs_mask = 0;
27270           unsigned long push_mask = 0;
27271
27272           for (regno = LR_REGNUM; regno >= 0; regno --)
27273             {
27274               if (pushable_regs & (1 << regno))
27275                 {
27276                   emit_move_insn (gen_rtx_REG (SImode, regno),
27277                                   gen_rtx_REG (SImode, next_hi_reg));
27278
27279                   high_regs_pushed --;
27280                   real_regs_mask |= (1 << next_hi_reg);
27281                   push_mask |= (1 << regno);
27282
27283                   if (high_regs_pushed)
27284                     {
27285                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27286                            next_hi_reg --)
27287                         if (live_regs_mask & (1 << next_hi_reg))
27288                           break;
27289                     }
27290                   else
27291                     break;
27292                 }
27293             }
27294
27295           /* If we had to find a work register and we have not yet
27296              saved the LR then add it to the list of regs to push.  */
27297           if (lr_needs_saving)
27298             {
27299               push_mask |= 1 << LR_REGNUM;
27300               real_regs_mask |= 1 << LR_REGNUM;
27301               lr_needs_saving = false;
27302               /* If the return address is not live at this point, we
27303                  can add LR to the list of registers that we can use
27304                  for pushes.  */
27305               if (!return_addr_live)
27306                 pushable_regs |= 1 << LR_REGNUM;
27307             }
27308
27309           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27310           RTX_FRAME_RELATED_P (insn) = 1;
27311         }
27312     }
27313
27314   /* Load the pic register before setting the frame pointer,
27315      so we can use r7 as a temporary work register.  */
27316   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27317     arm_load_pic_register (live_regs_mask, NULL_RTX);
27318
27319   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27320     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27321                     stack_pointer_rtx);
27322
27323   size = offsets->outgoing_args - offsets->saved_args;
27324   if (flag_stack_usage_info)
27325     current_function_static_stack_size = size;
27326
27327   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27328   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27329        || flag_stack_clash_protection)
27330       && size)
27331     sorry ("%<-fstack-check=specific%> for Thumb-1");
27332
27333   amount = offsets->outgoing_args - offsets->saved_regs;
27334   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27335   if (amount)
27336     {
27337       if (amount < 512)
27338         {
27339           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27340                                         GEN_INT (- amount)));
27341           RTX_FRAME_RELATED_P (insn) = 1;
27342         }
27343       else
27344         {
27345           rtx reg, dwarf;
27346
27347           /* The stack decrement is too big for an immediate value in a single
27348              insn.  In theory we could issue multiple subtracts, but after
27349              three of them it becomes more space efficient to place the full
27350              value in the constant pool and load into a register.  (Also the
27351              ARM debugger really likes to see only one stack decrement per
27352              function).  So instead we look for a scratch register into which
27353              we can load the decrement, and then we subtract this from the
27354              stack pointer.  Unfortunately on the thumb the only available
27355              scratch registers are the argument registers, and we cannot use
27356              these as they may hold arguments to the function.  Instead we
27357              attempt to locate a call preserved register which is used by this
27358              function.  If we can find one, then we know that it will have
27359              been pushed at the start of the prologue and so we can corrupt
27360              it now.  */
27361           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27362             if (live_regs_mask & (1 << regno))
27363               break;
27364
27365           gcc_assert(regno <= LAST_LO_REGNUM);
27366
27367           reg = gen_rtx_REG (SImode, regno);
27368
27369           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27370
27371           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27372                                         stack_pointer_rtx, reg));
27373
27374           dwarf = gen_rtx_SET (stack_pointer_rtx,
27375                                plus_constant (Pmode, stack_pointer_rtx,
27376                                               -amount));
27377           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27378           RTX_FRAME_RELATED_P (insn) = 1;
27379         }
27380     }
27381
27382   if (frame_pointer_needed)
27383     thumb_set_frame_pointer (offsets);
27384
27385   /* If we are profiling, make sure no instructions are scheduled before
27386      the call to mcount.  Similarly if the user has requested no
27387      scheduling in the prolog.  Similarly if we want non-call exceptions
27388      using the EABI unwinder, to prevent faulting instructions from being
27389      swapped with a stack adjustment.  */
27390   if (crtl->profile || !TARGET_SCHED_PROLOG
27391       || (arm_except_unwind_info (&global_options) == UI_TARGET
27392           && cfun->can_throw_non_call_exceptions))
27393     emit_insn (gen_blockage ());
27394
27395   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27396   if (live_regs_mask & 0xff)
27397     cfun->machine->lr_save_eliminated = 0;
27398 }
27399
27400 /* Clear caller saved registers not used to pass return values and leaked
27401    condition flags before exiting a cmse_nonsecure_entry function.  */
27402
27403 void
27404 cmse_nonsecure_entry_clear_before_return (void)
27405 {
27406   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27407   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27408   uint32_t padding_bits_to_clear = 0;
27409   auto_sbitmap to_clear_bitmap (maxregno + 1);
27410   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27411   tree result_type;
27412
27413   bitmap_clear (to_clear_bitmap);
27414   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27415   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27416
27417   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27418      registers.  */
27419   if (clear_vfpregs)
27420     {
27421       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27422
27423       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27424
27425       if (!TARGET_HAVE_FPCXT_CMSE)
27426         {
27427           /* Make sure we don't clear the two scratch registers used to clear
27428              the relevant FPSCR bits in output_return_instruction.  */
27429           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27430           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27431           emit_use (gen_rtx_REG (SImode, 4));
27432           bitmap_clear_bit (to_clear_bitmap, 4);
27433         }
27434     }
27435
27436   /* If the user has defined registers to be caller saved, these are no longer
27437      restored by the function before returning and must thus be cleared for
27438      security purposes.  */
27439   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27440     {
27441       /* We do not touch registers that can be used to pass arguments as per
27442          the AAPCS, since these should never be made callee-saved by user
27443          options.  */
27444       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27445         continue;
27446       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27447         continue;
27448       if (!callee_saved_reg_p (regno)
27449           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27450               || TARGET_HARD_FLOAT))
27451         bitmap_set_bit (to_clear_bitmap, regno);
27452     }
27453
27454   /* Make sure we do not clear the registers used to return the result in.  */
27455   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27456   if (!VOID_TYPE_P (result_type))
27457     {
27458       uint64_t to_clear_return_mask;
27459       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27460
27461       /* No need to check that we return in registers, because we don't
27462          support returning on stack yet.  */
27463       gcc_assert (REG_P (result_rtl));
27464       to_clear_return_mask
27465         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27466                                      &padding_bits_to_clear);
27467       if (to_clear_return_mask)
27468         {
27469           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27470           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27471             {
27472               if (to_clear_return_mask & (1ULL << regno))
27473                 bitmap_clear_bit (to_clear_bitmap, regno);
27474             }
27475         }
27476     }
27477
27478   if (padding_bits_to_clear != 0)
27479     {
27480       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27481       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27482
27483       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27484          returning a composite type, which only uses r0.  Let's make sure that
27485          r1-r3 is cleared too.  */
27486       bitmap_clear (to_clear_arg_regs_bitmap);
27487       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27488       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27489     }
27490
27491   /* Clear full registers that leak before returning.  */
27492   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27493   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27494   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27495                         clearing_reg);
27496 }
27497
27498 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27499    POP instruction can be generated.  LR should be replaced by PC.  All
27500    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27501    all we really need to check here is if single register is to be
27502    returned, or multiple register return.  */
27503 void
27504 thumb2_expand_return (bool simple_return)
27505 {
27506   int i, num_regs;
27507   unsigned long saved_regs_mask;
27508   arm_stack_offsets *offsets;
27509
27510   offsets = arm_get_frame_offsets ();
27511   saved_regs_mask = offsets->saved_regs_mask;
27512
27513   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27514     if (saved_regs_mask & (1 << i))
27515       num_regs++;
27516
27517   if (!simple_return && saved_regs_mask)
27518     {
27519       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27520          functions or adapt code to handle according to ACLE.  This path should
27521          not be reachable for cmse_nonsecure_entry functions though we prefer
27522          to assert it for now to ensure that future code changes do not silently
27523          change this behavior.  */
27524       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27525       if (arm_current_function_pac_enabled_p ())
27526         {
27527           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27528           arm_emit_multi_reg_pop (saved_regs_mask);
27529           emit_insn (gen_aut_nop ());
27530           emit_jump_insn (simple_return_rtx);
27531         }
27532       else if (num_regs == 1)
27533         {
27534           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27535           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27536           rtx addr = gen_rtx_MEM (SImode,
27537                                   gen_rtx_POST_INC (SImode,
27538                                                     stack_pointer_rtx));
27539           set_mem_alias_set (addr, get_frame_alias_set ());
27540           XVECEXP (par, 0, 0) = ret_rtx;
27541           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27542           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27543           emit_jump_insn (par);
27544         }
27545       else
27546         {
27547           saved_regs_mask &= ~ (1 << LR_REGNUM);
27548           saved_regs_mask |=   (1 << PC_REGNUM);
27549           arm_emit_multi_reg_pop (saved_regs_mask);
27550         }
27551     }
27552   else
27553     {
27554       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27555         cmse_nonsecure_entry_clear_before_return ();
27556       emit_jump_insn (simple_return_rtx);
27557     }
27558 }
27559
27560 void
27561 thumb1_expand_epilogue (void)
27562 {
27563   HOST_WIDE_INT amount;
27564   arm_stack_offsets *offsets;
27565   int regno;
27566
27567   /* Naked functions don't have prologues.  */
27568   if (IS_NAKED (arm_current_func_type ()))
27569     return;
27570
27571   offsets = arm_get_frame_offsets ();
27572   amount = offsets->outgoing_args - offsets->saved_regs;
27573
27574   if (frame_pointer_needed)
27575     {
27576       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27577       amount = offsets->locals_base - offsets->saved_regs;
27578     }
27579   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27580
27581   gcc_assert (amount >= 0);
27582   if (amount)
27583     {
27584       emit_insn (gen_blockage ());
27585
27586       if (amount < 512)
27587         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27588                                GEN_INT (amount)));
27589       else
27590         {
27591           /* r3 is always free in the epilogue.  */
27592           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27593
27594           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27595           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27596         }
27597     }
27598
27599   /* Emit a USE (stack_pointer_rtx), so that
27600      the stack adjustment will not be deleted.  */
27601   emit_insn (gen_force_register_use (stack_pointer_rtx));
27602
27603   if (crtl->profile || !TARGET_SCHED_PROLOG)
27604     emit_insn (gen_blockage ());
27605
27606   /* Emit a clobber for each insn that will be restored in the epilogue,
27607      so that flow2 will get register lifetimes correct.  */
27608   for (regno = 0; regno < 13; regno++)
27609     if (reg_needs_saving_p (regno))
27610       emit_clobber (gen_rtx_REG (SImode, regno));
27611
27612   if (! df_regs_ever_live_p (LR_REGNUM))
27613     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27614
27615   /* Clear all caller-saved regs that are not used to return.  */
27616   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27617     cmse_nonsecure_entry_clear_before_return ();
27618 }
27619
27620 /* Epilogue code for APCS frame.  */
27621 static void
27622 arm_expand_epilogue_apcs_frame (bool really_return)
27623 {
27624   unsigned long func_type;
27625   unsigned long saved_regs_mask;
27626   int num_regs = 0;
27627   int i;
27628   int floats_from_frame = 0;
27629   arm_stack_offsets *offsets;
27630
27631   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27632   func_type = arm_current_func_type ();
27633
27634   /* Get frame offsets for ARM.  */
27635   offsets = arm_get_frame_offsets ();
27636   saved_regs_mask = offsets->saved_regs_mask;
27637
27638   /* Find the offset of the floating-point save area in the frame.  */
27639   floats_from_frame
27640     = (offsets->saved_args
27641        + arm_compute_static_chain_stack_bytes ()
27642        - offsets->frame);
27643
27644   /* Compute how many core registers saved and how far away the floats are.  */
27645   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27646     if (saved_regs_mask & (1 << i))
27647       {
27648         num_regs++;
27649         floats_from_frame += 4;
27650       }
27651
27652   if (TARGET_VFP_BASE)
27653     {
27654       int start_reg;
27655       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27656
27657       /* The offset is from IP_REGNUM.  */
27658       int saved_size = arm_get_vfp_saved_size ();
27659       if (saved_size > 0)
27660         {
27661           rtx_insn *insn;
27662           floats_from_frame += saved_size;
27663           insn = emit_insn (gen_addsi3 (ip_rtx,
27664                                         hard_frame_pointer_rtx,
27665                                         GEN_INT (-floats_from_frame)));
27666           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27667                                        ip_rtx, hard_frame_pointer_rtx);
27668         }
27669
27670       /* Generate VFP register multi-pop.  */
27671       start_reg = FIRST_VFP_REGNUM;
27672
27673       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27674         /* Look for a case where a reg does not need restoring.  */
27675         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27676           {
27677             if (start_reg != i)
27678               arm_emit_vfp_multi_reg_pop (start_reg,
27679                                           (i - start_reg) / 2,
27680                                           gen_rtx_REG (SImode,
27681                                                        IP_REGNUM));
27682             start_reg = i + 2;
27683           }
27684
27685       /* Restore the remaining regs that we have discovered (or possibly
27686          even all of them, if the conditional in the for loop never
27687          fired).  */
27688       if (start_reg != i)
27689         arm_emit_vfp_multi_reg_pop (start_reg,
27690                                     (i - start_reg) / 2,
27691                                     gen_rtx_REG (SImode, IP_REGNUM));
27692     }
27693
27694   if (TARGET_IWMMXT)
27695     {
27696       /* The frame pointer is guaranteed to be non-double-word aligned, as
27697          it is set to double-word-aligned old_stack_pointer - 4.  */
27698       rtx_insn *insn;
27699       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27700
27701       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27702         if (reg_needs_saving_p (i))
27703           {
27704             rtx addr = gen_frame_mem (V2SImode,
27705                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27706                                                 - lrm_count * 4));
27707             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27708             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27709                                                gen_rtx_REG (V2SImode, i),
27710                                                NULL_RTX);
27711             lrm_count += 2;
27712           }
27713     }
27714
27715   /* saved_regs_mask should contain IP which contains old stack pointer
27716      at the time of activation creation.  Since SP and IP are adjacent registers,
27717      we can restore the value directly into SP.  */
27718   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27719   saved_regs_mask &= ~(1 << IP_REGNUM);
27720   saved_regs_mask |= (1 << SP_REGNUM);
27721
27722   /* There are two registers left in saved_regs_mask - LR and PC.  We
27723      only need to restore LR (the return address), but to
27724      save time we can load it directly into PC, unless we need a
27725      special function exit sequence, or we are not really returning.  */
27726   if (really_return
27727       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27728       && !crtl->calls_eh_return)
27729     /* Delete LR from the register mask, so that LR on
27730        the stack is loaded into the PC in the register mask.  */
27731     saved_regs_mask &= ~(1 << LR_REGNUM);
27732   else
27733     saved_regs_mask &= ~(1 << PC_REGNUM);
27734
27735   num_regs = bit_count (saved_regs_mask);
27736   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27737     {
27738       rtx_insn *insn;
27739       emit_insn (gen_blockage ());
27740       /* Unwind the stack to just below the saved registers.  */
27741       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27742                                     hard_frame_pointer_rtx,
27743                                     GEN_INT (- 4 * num_regs)));
27744
27745       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27746                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27747     }
27748
27749   arm_emit_multi_reg_pop (saved_regs_mask);
27750
27751   if (IS_INTERRUPT (func_type))
27752     {
27753       /* Interrupt handlers will have pushed the
27754          IP onto the stack, so restore it now.  */
27755       rtx_insn *insn;
27756       rtx addr = gen_rtx_MEM (SImode,
27757                               gen_rtx_POST_INC (SImode,
27758                               stack_pointer_rtx));
27759       set_mem_alias_set (addr, get_frame_alias_set ());
27760       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27761       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27762                                          gen_rtx_REG (SImode, IP_REGNUM),
27763                                          NULL_RTX);
27764     }
27765
27766   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27767     return;
27768
27769   if (crtl->calls_eh_return)
27770     emit_insn (gen_addsi3 (stack_pointer_rtx,
27771                            stack_pointer_rtx,
27772                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27773
27774   if (IS_STACKALIGN (func_type))
27775     /* Restore the original stack pointer.  Before prologue, the stack was
27776        realigned and the original stack pointer saved in r0.  For details,
27777        see comment in arm_expand_prologue.  */
27778     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27779
27780   emit_jump_insn (simple_return_rtx);
27781 }
27782
27783 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27784    function is not a sibcall.  */
27785 void
27786 arm_expand_epilogue (bool really_return)
27787 {
27788   unsigned long func_type;
27789   unsigned long saved_regs_mask;
27790   int num_regs = 0;
27791   int i;
27792   int amount;
27793   arm_stack_offsets *offsets;
27794
27795   func_type = arm_current_func_type ();
27796
27797   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27798      let output_return_instruction take care of instruction emission if any.  */
27799   if (IS_NAKED (func_type)
27800       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27801     {
27802       if (really_return)
27803         emit_jump_insn (simple_return_rtx);
27804       return;
27805     }
27806
27807   /* If we are throwing an exception, then we really must be doing a
27808      return, so we can't tail-call.  */
27809   gcc_assert (!crtl->calls_eh_return || really_return);
27810
27811   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27812     {
27813       arm_expand_epilogue_apcs_frame (really_return);
27814       return;
27815     }
27816
27817   /* Get frame offsets for ARM.  */
27818   offsets = arm_get_frame_offsets ();
27819   saved_regs_mask = offsets->saved_regs_mask;
27820   num_regs = bit_count (saved_regs_mask);
27821
27822   if (frame_pointer_needed)
27823     {
27824       rtx_insn *insn;
27825       /* Restore stack pointer if necessary.  */
27826       if (TARGET_ARM)
27827         {
27828           /* In ARM mode, frame pointer points to first saved register.
27829              Restore stack pointer to last saved register.  */
27830           amount = offsets->frame - offsets->saved_regs;
27831
27832           /* Force out any pending memory operations that reference stacked data
27833              before stack de-allocation occurs.  */
27834           emit_insn (gen_blockage ());
27835           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27836                             hard_frame_pointer_rtx,
27837                             GEN_INT (amount)));
27838           arm_add_cfa_adjust_cfa_note (insn, amount,
27839                                        stack_pointer_rtx,
27840                                        hard_frame_pointer_rtx);
27841
27842           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27843              deleted.  */
27844           emit_insn (gen_force_register_use (stack_pointer_rtx));
27845         }
27846       else
27847         {
27848           /* In Thumb-2 mode, the frame pointer points to the last saved
27849              register.  */
27850           amount = offsets->locals_base - offsets->saved_regs;
27851           if (amount)
27852             {
27853               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27854                                 hard_frame_pointer_rtx,
27855                                 GEN_INT (amount)));
27856               arm_add_cfa_adjust_cfa_note (insn, amount,
27857                                            hard_frame_pointer_rtx,
27858                                            hard_frame_pointer_rtx);
27859             }
27860
27861           /* Force out any pending memory operations that reference stacked data
27862              before stack de-allocation occurs.  */
27863           emit_insn (gen_blockage ());
27864           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27865                                        hard_frame_pointer_rtx));
27866           arm_add_cfa_adjust_cfa_note (insn, 0,
27867                                        stack_pointer_rtx,
27868                                        hard_frame_pointer_rtx);
27869           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27870              deleted.  */
27871           emit_insn (gen_force_register_use (stack_pointer_rtx));
27872         }
27873     }
27874   else
27875     {
27876       /* Pop off outgoing args and local frame to adjust stack pointer to
27877          last saved register.  */
27878       amount = offsets->outgoing_args - offsets->saved_regs;
27879       if (amount)
27880         {
27881           rtx_insn *tmp;
27882           /* Force out any pending memory operations that reference stacked data
27883              before stack de-allocation occurs.  */
27884           emit_insn (gen_blockage ());
27885           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27886                                        stack_pointer_rtx,
27887                                        GEN_INT (amount)));
27888           arm_add_cfa_adjust_cfa_note (tmp, amount,
27889                                        stack_pointer_rtx, stack_pointer_rtx);
27890           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27891              not deleted.  */
27892           emit_insn (gen_force_register_use (stack_pointer_rtx));
27893         }
27894     }
27895
27896   if (TARGET_VFP_BASE)
27897     {
27898       /* Generate VFP register multi-pop.  */
27899       int end_reg = LAST_VFP_REGNUM + 1;
27900
27901       /* Scan the registers in reverse order.  We need to match
27902          any groupings made in the prologue and generate matching
27903          vldm operations.  The need to match groups is because,
27904          unlike pop, vldm can only do consecutive regs.  */
27905       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27906         /* Look for a case where a reg does not need restoring.  */
27907         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27908           {
27909             /* Restore the regs discovered so far (from reg+2 to
27910                end_reg).  */
27911             if (end_reg > i + 2)
27912               arm_emit_vfp_multi_reg_pop (i + 2,
27913                                           (end_reg - (i + 2)) / 2,
27914                                           stack_pointer_rtx);
27915             end_reg = i;
27916           }
27917
27918       /* Restore the remaining regs that we have discovered (or possibly
27919          even all of them, if the conditional in the for loop never
27920          fired).  */
27921       if (end_reg > i + 2)
27922         arm_emit_vfp_multi_reg_pop (i + 2,
27923                                     (end_reg - (i + 2)) / 2,
27924                                     stack_pointer_rtx);
27925     }
27926
27927   if (TARGET_IWMMXT)
27928     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27929       if (reg_needs_saving_p (i))
27930         {
27931           rtx_insn *insn;
27932           rtx addr = gen_rtx_MEM (V2SImode,
27933                                   gen_rtx_POST_INC (SImode,
27934                                                     stack_pointer_rtx));
27935           set_mem_alias_set (addr, get_frame_alias_set ());
27936           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27937           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27938                                              gen_rtx_REG (V2SImode, i),
27939                                              NULL_RTX);
27940           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27941                                        stack_pointer_rtx, stack_pointer_rtx);
27942         }
27943
27944   if (saved_regs_mask)
27945     {
27946       rtx insn;
27947       bool return_in_pc = false;
27948
27949       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27950           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27951           && !IS_CMSE_ENTRY (func_type)
27952           && !IS_STACKALIGN (func_type)
27953           && really_return
27954           && crtl->args.pretend_args_size == 0
27955           && saved_regs_mask & (1 << LR_REGNUM)
27956           && !crtl->calls_eh_return
27957           && !arm_current_function_pac_enabled_p ())
27958         {
27959           saved_regs_mask &= ~(1 << LR_REGNUM);
27960           saved_regs_mask |= (1 << PC_REGNUM);
27961           return_in_pc = true;
27962         }
27963
27964       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27965         {
27966           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27967             if (saved_regs_mask & (1 << i))
27968               {
27969                 rtx addr = gen_rtx_MEM (SImode,
27970                                         gen_rtx_POST_INC (SImode,
27971                                                           stack_pointer_rtx));
27972                 set_mem_alias_set (addr, get_frame_alias_set ());
27973
27974                 if (i == PC_REGNUM)
27975                   {
27976                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27977                     XVECEXP (insn, 0, 0) = ret_rtx;
27978                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27979                                                         addr);
27980                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27981                     insn = emit_jump_insn (insn);
27982                   }
27983                 else
27984                   {
27985                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27986                                                  addr));
27987                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27988                                                        gen_rtx_REG (SImode, i),
27989                                                        NULL_RTX);
27990                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27991                                                  stack_pointer_rtx,
27992                                                  stack_pointer_rtx);
27993                   }
27994               }
27995         }
27996       else
27997         {
27998           if (TARGET_LDRD
27999               && current_tune->prefer_ldrd_strd
28000               && !optimize_function_for_size_p (cfun))
28001             {
28002               if (TARGET_THUMB2)
28003                 thumb2_emit_ldrd_pop (saved_regs_mask);
28004               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28005                 arm_emit_ldrd_pop (saved_regs_mask);
28006               else
28007                 arm_emit_multi_reg_pop (saved_regs_mask);
28008             }
28009           else
28010             arm_emit_multi_reg_pop (saved_regs_mask);
28011         }
28012
28013       if (return_in_pc)
28014         return;
28015     }
28016
28017   amount
28018     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28019   if (amount)
28020     {
28021       int i, j;
28022       rtx dwarf = NULL_RTX;
28023       rtx_insn *tmp =
28024         emit_insn (gen_addsi3 (stack_pointer_rtx,
28025                                stack_pointer_rtx,
28026                                GEN_INT (amount)));
28027
28028       RTX_FRAME_RELATED_P (tmp) = 1;
28029
28030       if (cfun->machine->uses_anonymous_args)
28031         {
28032           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28033              pretend_args in stack.  */
28034           int num_regs = crtl->args.pretend_args_size / 4;
28035           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28036           for (j = 0, i = 0; j < num_regs; i++)
28037             if (saved_regs_mask & (1 << i))
28038               {
28039                 rtx reg = gen_rtx_REG (SImode, i);
28040                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28041                 j++;
28042               }
28043           REG_NOTES (tmp) = dwarf;
28044         }
28045       arm_add_cfa_adjust_cfa_note (tmp, amount,
28046                                    stack_pointer_rtx, stack_pointer_rtx);
28047     }
28048
28049   if (IS_CMSE_ENTRY (func_type))
28050     {
28051       /* CMSE_ENTRY always returns.  */
28052       gcc_assert (really_return);
28053       /* Clear all caller-saved regs that are not used to return.  */
28054       cmse_nonsecure_entry_clear_before_return ();
28055
28056       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28057          VLDR.  */
28058       if (TARGET_HAVE_FPCXT_CMSE)
28059         {
28060           rtx_insn *insn;
28061
28062           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28063                                                    GEN_INT (FPCXTNS_ENUM)));
28064           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28065                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28066           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28067           RTX_FRAME_RELATED_P (insn) = 1;
28068         }
28069     }
28070
28071   if (arm_current_function_pac_enabled_p ())
28072     emit_insn (gen_aut_nop ());
28073
28074   if (!really_return)
28075     return;
28076
28077   if (crtl->calls_eh_return)
28078     emit_insn (gen_addsi3 (stack_pointer_rtx,
28079                            stack_pointer_rtx,
28080                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28081
28082   if (IS_STACKALIGN (func_type))
28083     /* Restore the original stack pointer.  Before prologue, the stack was
28084        realigned and the original stack pointer saved in r0.  For details,
28085        see comment in arm_expand_prologue.  */
28086     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28087
28088   emit_jump_insn (simple_return_rtx);
28089 }
28090
28091 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28092    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28093
28094 const char *
28095 thumb1_output_interwork (void)
28096 {
28097   const char * name;
28098   FILE *f = asm_out_file;
28099
28100   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28101   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28102               == SYMBOL_REF);
28103   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28104
28105   /* Generate code sequence to switch us into Thumb mode.  */
28106   /* The .code 32 directive has already been emitted by
28107      ASM_DECLARE_FUNCTION_NAME.  */
28108   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28109   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28110
28111   /* Generate a label, so that the debugger will notice the
28112      change in instruction sets.  This label is also used by
28113      the assembler to bypass the ARM code when this function
28114      is called from a Thumb encoded function elsewhere in the
28115      same file.  Hence the definition of STUB_NAME here must
28116      agree with the definition in gas/config/tc-arm.c.  */
28117
28118 #define STUB_NAME ".real_start_of"
28119
28120   fprintf (f, "\t.code\t16\n");
28121 #ifdef ARM_PE
28122   if (arm_dllexport_name_p (name))
28123     name = arm_strip_name_encoding (name);
28124 #endif
28125   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28126   fprintf (f, "\t.thumb_func\n");
28127   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28128
28129   return "";
28130 }
28131
28132 /* Handle the case of a double word load into a low register from
28133    a computed memory address.  The computed address may involve a
28134    register which is overwritten by the load.  */
28135 const char *
28136 thumb_load_double_from_address (rtx *operands)
28137 {
28138   rtx addr;
28139   rtx base;
28140   rtx offset;
28141   rtx arg1;
28142   rtx arg2;
28143
28144   gcc_assert (REG_P (operands[0]));
28145   gcc_assert (MEM_P (operands[1]));
28146
28147   /* Get the memory address.  */
28148   addr = XEXP (operands[1], 0);
28149
28150   /* Work out how the memory address is computed.  */
28151   switch (GET_CODE (addr))
28152     {
28153     case REG:
28154       operands[2] = adjust_address (operands[1], SImode, 4);
28155
28156       if (REGNO (operands[0]) == REGNO (addr))
28157         {
28158           output_asm_insn ("ldr\t%H0, %2", operands);
28159           output_asm_insn ("ldr\t%0, %1", operands);
28160         }
28161       else
28162         {
28163           output_asm_insn ("ldr\t%0, %1", operands);
28164           output_asm_insn ("ldr\t%H0, %2", operands);
28165         }
28166       break;
28167
28168     case CONST:
28169       /* Compute <address> + 4 for the high order load.  */
28170       operands[2] = adjust_address (operands[1], SImode, 4);
28171
28172       output_asm_insn ("ldr\t%0, %1", operands);
28173       output_asm_insn ("ldr\t%H0, %2", operands);
28174       break;
28175
28176     case PLUS:
28177       arg1   = XEXP (addr, 0);
28178       arg2   = XEXP (addr, 1);
28179
28180       if (CONSTANT_P (arg1))
28181         base = arg2, offset = arg1;
28182       else
28183         base = arg1, offset = arg2;
28184
28185       gcc_assert (REG_P (base));
28186
28187       /* Catch the case of <address> = <reg> + <reg> */
28188       if (REG_P (offset))
28189         {
28190           int reg_offset = REGNO (offset);
28191           int reg_base   = REGNO (base);
28192           int reg_dest   = REGNO (operands[0]);
28193
28194           /* Add the base and offset registers together into the
28195              higher destination register.  */
28196           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28197                        reg_dest + 1, reg_base, reg_offset);
28198
28199           /* Load the lower destination register from the address in
28200              the higher destination register.  */
28201           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28202                        reg_dest, reg_dest + 1);
28203
28204           /* Load the higher destination register from its own address
28205              plus 4.  */
28206           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28207                        reg_dest + 1, reg_dest + 1);
28208         }
28209       else
28210         {
28211           /* Compute <address> + 4 for the high order load.  */
28212           operands[2] = adjust_address (operands[1], SImode, 4);
28213
28214           /* If the computed address is held in the low order register
28215              then load the high order register first, otherwise always
28216              load the low order register first.  */
28217           if (REGNO (operands[0]) == REGNO (base))
28218             {
28219               output_asm_insn ("ldr\t%H0, %2", operands);
28220               output_asm_insn ("ldr\t%0, %1", operands);
28221             }
28222           else
28223             {
28224               output_asm_insn ("ldr\t%0, %1", operands);
28225               output_asm_insn ("ldr\t%H0, %2", operands);
28226             }
28227         }
28228       break;
28229
28230     case LABEL_REF:
28231       /* With no registers to worry about we can just load the value
28232          directly.  */
28233       operands[2] = adjust_address (operands[1], SImode, 4);
28234
28235       output_asm_insn ("ldr\t%H0, %2", operands);
28236       output_asm_insn ("ldr\t%0, %1", operands);
28237       break;
28238
28239     default:
28240       gcc_unreachable ();
28241     }
28242
28243   return "";
28244 }
28245
28246 const char *
28247 thumb_output_move_mem_multiple (int n, rtx *operands)
28248 {
28249   switch (n)
28250     {
28251     case 2:
28252       if (REGNO (operands[4]) > REGNO (operands[5]))
28253         std::swap (operands[4], operands[5]);
28254
28255       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28256       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28257       break;
28258
28259     case 3:
28260       if (REGNO (operands[4]) > REGNO (operands[5]))
28261         std::swap (operands[4], operands[5]);
28262       if (REGNO (operands[5]) > REGNO (operands[6]))
28263         std::swap (operands[5], operands[6]);
28264       if (REGNO (operands[4]) > REGNO (operands[5]))
28265         std::swap (operands[4], operands[5]);
28266
28267       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28268       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28269       break;
28270
28271     default:
28272       gcc_unreachable ();
28273     }
28274
28275   return "";
28276 }
28277
28278 /* Output a call-via instruction for thumb state.  */
28279 const char *
28280 thumb_call_via_reg (rtx reg)
28281 {
28282   int regno = REGNO (reg);
28283   rtx *labelp;
28284
28285   gcc_assert (regno < LR_REGNUM);
28286
28287   /* If we are in the normal text section we can use a single instance
28288      per compilation unit.  If we are doing function sections, then we need
28289      an entry per section, since we can't rely on reachability.  */
28290   if (in_section == text_section)
28291     {
28292       thumb_call_reg_needed = 1;
28293
28294       if (thumb_call_via_label[regno] == NULL)
28295         thumb_call_via_label[regno] = gen_label_rtx ();
28296       labelp = thumb_call_via_label + regno;
28297     }
28298   else
28299     {
28300       if (cfun->machine->call_via[regno] == NULL)
28301         cfun->machine->call_via[regno] = gen_label_rtx ();
28302       labelp = cfun->machine->call_via + regno;
28303     }
28304
28305   output_asm_insn ("bl\t%a0", labelp);
28306   return "";
28307 }
28308
28309 /* Routines for generating rtl.  */
28310 void
28311 thumb_expand_cpymemqi (rtx *operands)
28312 {
28313   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28314   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28315   HOST_WIDE_INT len = INTVAL (operands[2]);
28316   HOST_WIDE_INT offset = 0;
28317
28318   while (len >= 12)
28319     {
28320       emit_insn (gen_cpymem12b (out, in, out, in));
28321       len -= 12;
28322     }
28323
28324   if (len >= 8)
28325     {
28326       emit_insn (gen_cpymem8b (out, in, out, in));
28327       len -= 8;
28328     }
28329
28330   if (len >= 4)
28331     {
28332       rtx reg = gen_reg_rtx (SImode);
28333       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28334       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28335       len -= 4;
28336       offset += 4;
28337     }
28338
28339   if (len >= 2)
28340     {
28341       rtx reg = gen_reg_rtx (HImode);
28342       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28343                                               plus_constant (Pmode, in,
28344                                                              offset))));
28345       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28346                                                                 offset)),
28347                             reg));
28348       len -= 2;
28349       offset += 2;
28350     }
28351
28352   if (len)
28353     {
28354       rtx reg = gen_reg_rtx (QImode);
28355       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28356                                               plus_constant (Pmode, in,
28357                                                              offset))));
28358       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28359                                                                 offset)),
28360                             reg));
28361     }
28362 }
28363
28364 void
28365 thumb_reload_out_hi (rtx *operands)
28366 {
28367   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28368 }
28369
28370 /* Return the length of a function name prefix
28371     that starts with the character 'c'.  */
28372 static int
28373 arm_get_strip_length (int c)
28374 {
28375   switch (c)
28376     {
28377     ARM_NAME_ENCODING_LENGTHS
28378       default: return 0;
28379     }
28380 }
28381
28382 /* Return a pointer to a function's name with any
28383    and all prefix encodings stripped from it.  */
28384 const char *
28385 arm_strip_name_encoding (const char *name)
28386 {
28387   int skip;
28388
28389   while ((skip = arm_get_strip_length (* name)))
28390     name += skip;
28391
28392   return name;
28393 }
28394
28395 /* If there is a '*' anywhere in the name's prefix, then
28396    emit the stripped name verbatim, otherwise prepend an
28397    underscore if leading underscores are being used.  */
28398 void
28399 arm_asm_output_labelref (FILE *stream, const char *name)
28400 {
28401   int skip;
28402   int verbatim = 0;
28403
28404   while ((skip = arm_get_strip_length (* name)))
28405     {
28406       verbatim |= (*name == '*');
28407       name += skip;
28408     }
28409
28410   if (verbatim)
28411     fputs (name, stream);
28412   else
28413     asm_fprintf (stream, "%U%s", name);
28414 }
28415
28416 /* This function is used to emit an EABI tag and its associated value.
28417    We emit the numerical value of the tag in case the assembler does not
28418    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28419    the tag name in a comment so that anyone reading the assembler output
28420    will know which tag is being set.
28421
28422    This function is not static because arm-c.cc needs it too.  */
28423
28424 void
28425 arm_emit_eabi_attribute (const char *name, int num, int val)
28426 {
28427   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28428   if (flag_verbose_asm || flag_debug_asm)
28429     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28430   asm_fprintf (asm_out_file, "\n");
28431 }
28432
28433 /* This function is used to print CPU tuning information as comment
28434    in assembler file.  Pointers are not printed for now.  */
28435
28436 void
28437 arm_print_tune_info (void)
28438 {
28439   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28440   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28441                current_tune->constant_limit);
28442   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28443                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28444   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28445                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28446   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28447                "prefetch.l1_cache_size:\t%d\n",
28448                current_tune->prefetch.l1_cache_size);
28449   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28450                "prefetch.l1_cache_line_size:\t%d\n",
28451                current_tune->prefetch.l1_cache_line_size);
28452   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28453                "prefer_constant_pool:\t%d\n",
28454                (int) current_tune->prefer_constant_pool);
28455   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28456                "branch_cost:\t(s:speed, p:predictable)\n");
28457   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28458   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28459                current_tune->branch_cost (false, false));
28460   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28461                current_tune->branch_cost (false, true));
28462   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28463                current_tune->branch_cost (true, false));
28464   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28465                current_tune->branch_cost (true, true));
28466   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28467                "prefer_ldrd_strd:\t%d\n",
28468                (int) current_tune->prefer_ldrd_strd);
28469   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28470                "logical_op_non_short_circuit:\t[%d,%d]\n",
28471                (int) current_tune->logical_op_non_short_circuit_thumb,
28472                (int) current_tune->logical_op_non_short_circuit_arm);
28473   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28474                "disparage_flag_setting_t16_encodings:\t%d\n",
28475                (int) current_tune->disparage_flag_setting_t16_encodings);
28476   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28477                "string_ops_prefer_neon:\t%d\n",
28478                (int) current_tune->string_ops_prefer_neon);
28479   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28480                "max_insns_inline_memset:\t%d\n",
28481                current_tune->max_insns_inline_memset);
28482   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28483                current_tune->fusible_ops);
28484   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28485                (int) current_tune->sched_autopref);
28486 }
28487
28488 /* The last set of target options used to emit .arch directives, etc.  This
28489    could be a function-local static if it were not required to expose it as a
28490    root to the garbage collector.  */
28491 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28492
28493 /* Print .arch and .arch_extension directives corresponding to the
28494    current architecture configuration.  */
28495 static void
28496 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28497 {
28498   arm_build_target build_target;
28499   /* If the target options haven't changed since the last time we were called
28500      there is nothing to do.  This should be sufficient to suppress the
28501      majority of redundant work.  */
28502   if (last_asm_targ_options == targ_options)
28503     return;
28504
28505   last_asm_targ_options = targ_options;
28506
28507   build_target.isa = sbitmap_alloc (isa_num_bits);
28508   arm_configure_build_target (&build_target, targ_options, false);
28509
28510   if (build_target.core_name
28511       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28512     {
28513       const char* truncated_name
28514         = arm_rewrite_selected_cpu (build_target.core_name);
28515       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28516     }
28517
28518   const arch_option *arch
28519     = arm_parse_arch_option_name (all_architectures, "-march",
28520                                   build_target.arch_name);
28521   auto_sbitmap opt_bits (isa_num_bits);
28522
28523   gcc_assert (arch);
28524
28525   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28526     {
28527       /* Keep backward compatability for assemblers which don't support
28528          armv7ve.  Fortunately, none of the following extensions are reset
28529          by a .fpu directive.  */
28530       asm_fprintf (stream, "\t.arch armv7-a\n");
28531       asm_fprintf (stream, "\t.arch_extension virt\n");
28532       asm_fprintf (stream, "\t.arch_extension idiv\n");
28533       asm_fprintf (stream, "\t.arch_extension sec\n");
28534       asm_fprintf (stream, "\t.arch_extension mp\n");
28535     }
28536   else
28537     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28538
28539   /* The .fpu directive will reset any architecture extensions from the
28540      assembler that relate to the fp/vector extensions.  So put this out before
28541      any .arch_extension directives.  */
28542   const char *fpu_name = (TARGET_SOFT_FLOAT
28543                           ? "softvfp"
28544                           : arm_identify_fpu_from_isa (build_target.isa));
28545   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28546
28547   if (!arch->common.extensions)
28548     return;
28549
28550   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28551        opt->name != NULL;
28552        opt++)
28553     {
28554       if (!opt->remove)
28555         {
28556           arm_initialize_isa (opt_bits, opt->isa_bits);
28557
28558           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28559              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28560              floating point instructions is disabled.  So the following check
28561              restricts the printing of ".arch_extension mve" and
28562              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28563              this special behaviour because the feature bit "mve" and
28564              "mve_float" are not part of "fpu bits", so they are not cleared
28565              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28566              TARGET_HAVE_MVE_FLOAT are disabled.  */
28567           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28568               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28569                   && !TARGET_HAVE_MVE_FLOAT))
28570             continue;
28571
28572           /* If every feature bit of this option is set in the target ISA
28573              specification, print out the option name.  However, don't print
28574              anything if all the bits are part of the FPU specification.  */
28575           if (bitmap_subset_p (opt_bits, build_target.isa)
28576               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28577             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28578         }
28579     }
28580 }
28581
28582 static void
28583 arm_file_start (void)
28584 {
28585   int val;
28586   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28587   bool bti = (aarch_enable_bti == 1);
28588
28589   arm_print_asm_arch_directives
28590     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28591
28592   if (TARGET_BPABI)
28593     {
28594       /* If we have a named cpu, but we the assembler does not support that
28595          name via .cpu, put out a cpu name attribute; but don't do this if the
28596          name starts with the fictitious prefix, 'generic'.  */
28597       if (arm_active_target.core_name
28598           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28599           && !startswith (arm_active_target.core_name, "generic"))
28600         {
28601           const char* truncated_name
28602             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28603           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28604             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28605                          truncated_name);
28606         }
28607
28608       if (print_tune_info)
28609         arm_print_tune_info ();
28610
28611       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28612         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28613
28614       if (TARGET_HARD_FLOAT_ABI)
28615         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28616
28617       /* Some of these attributes only apply when the corresponding features
28618          are used.  However we don't have any easy way of figuring this out.
28619          Conservatively record the setting that would have been used.  */
28620
28621       if (flag_rounding_math)
28622         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28623
28624       if (!flag_unsafe_math_optimizations)
28625         {
28626           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28627           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28628         }
28629       if (flag_signaling_nans)
28630         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28631
28632       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28633                            flag_finite_math_only ? 1 : 3);
28634
28635       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28636       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28637       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28638                                flag_short_enums ? 1 : 2);
28639
28640       /* Tag_ABI_optimization_goals.  */
28641       if (optimize_size)
28642         val = 4;
28643       else if (optimize >= 2)
28644         val = 2;
28645       else if (optimize)
28646         val = 1;
28647       else
28648         val = 6;
28649       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28650
28651       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28652                                unaligned_access);
28653
28654       if (arm_fp16_format)
28655         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28656                              (int) arm_fp16_format);
28657
28658       if (TARGET_HAVE_PACBTI)
28659         {
28660           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28661           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28662         }
28663       else if (pac || bti)
28664         {
28665           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28666           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28667         }
28668
28669       if (bti)
28670         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28671       if (pac)
28672         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28673
28674       if (arm_lang_output_object_attributes_hook)
28675         arm_lang_output_object_attributes_hook();
28676     }
28677
28678   default_file_start ();
28679 }
28680
28681 static void
28682 arm_file_end (void)
28683 {
28684   int regno;
28685
28686   /* Just in case the last function output in the assembler had non-default
28687      architecture directives, we force the assembler state back to the default
28688      set, so that any 'calculated' build attributes are based on the default
28689      options rather than the special options for that function.  */
28690   arm_print_asm_arch_directives
28691     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28692
28693   if (NEED_INDICATE_EXEC_STACK)
28694     /* Add .note.GNU-stack.  */
28695     file_end_indicate_exec_stack ();
28696
28697   if (! thumb_call_reg_needed)
28698     return;
28699
28700   switch_to_section (text_section);
28701   asm_fprintf (asm_out_file, "\t.code 16\n");
28702   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28703
28704   for (regno = 0; regno < LR_REGNUM; regno++)
28705     {
28706       rtx label = thumb_call_via_label[regno];
28707
28708       if (label != 0)
28709         {
28710           targetm.asm_out.internal_label (asm_out_file, "L",
28711                                           CODE_LABEL_NUMBER (label));
28712           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28713         }
28714     }
28715 }
28716
28717 #ifndef ARM_PE
28718 /* Symbols in the text segment can be accessed without indirecting via the
28719    constant pool; it may take an extra binary operation, but this is still
28720    faster than indirecting via memory.  Don't do this when not optimizing,
28721    since we won't be calculating al of the offsets necessary to do this
28722    simplification.  */
28723
28724 static void
28725 arm_encode_section_info (tree decl, rtx rtl, int first)
28726 {
28727   if (optimize > 0 && TREE_CONSTANT (decl))
28728     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28729
28730   default_encode_section_info (decl, rtl, first);
28731 }
28732 #endif /* !ARM_PE */
28733
28734 static void
28735 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28736 {
28737   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28738       && !strcmp (prefix, "L"))
28739     {
28740       arm_ccfsm_state = 0;
28741       arm_target_insn = NULL;
28742     }
28743   default_internal_label (stream, prefix, labelno);
28744 }
28745
28746 /* Define classes to generate code as RTL or output asm to a file.
28747    Using templates then allows to use the same code to output code
28748    sequences in the two formats.  */
28749 class thumb1_const_rtl
28750 {
28751  public:
28752   thumb1_const_rtl (rtx dst) : dst (dst) {}
28753
28754   void mov (HOST_WIDE_INT val)
28755   {
28756     emit_set_insn (dst, GEN_INT (val));
28757   }
28758
28759   void add (HOST_WIDE_INT val)
28760   {
28761     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28762   }
28763
28764   void ashift (HOST_WIDE_INT shift)
28765   {
28766     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28767   }
28768
28769   void neg ()
28770   {
28771     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28772   }
28773
28774  private:
28775   rtx dst;
28776 };
28777
28778 class thumb1_const_print
28779 {
28780  public:
28781   thumb1_const_print (FILE *f, int regno)
28782   {
28783     t_file = f;
28784     dst_regname = reg_names[regno];
28785   }
28786
28787   void mov (HOST_WIDE_INT val)
28788   {
28789     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28790                  dst_regname, val);
28791   }
28792
28793   void add (HOST_WIDE_INT val)
28794   {
28795     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28796                  dst_regname, val);
28797   }
28798
28799   void ashift (HOST_WIDE_INT shift)
28800   {
28801     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28802                  dst_regname, shift);
28803   }
28804
28805   void neg ()
28806   {
28807     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28808   }
28809
28810  private:
28811   FILE *t_file;
28812   const char *dst_regname;
28813 };
28814
28815 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28816    Avoid generating useless code when one of the bytes is zero.  */
28817 template <class T>
28818 void
28819 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28820 {
28821   bool mov_done_p = false;
28822   unsigned HOST_WIDE_INT val = op1;
28823   int shift = 0;
28824   int i;
28825
28826   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28827
28828   if (val <= 255)
28829     {
28830       dst.mov (val);
28831       return;
28832     }
28833
28834   /* For negative numbers with the first nine bits set, build the
28835      opposite of OP1, then negate it, it's generally shorter and not
28836      longer.  */
28837   if ((val & 0xFF800000) == 0xFF800000)
28838     {
28839       thumb1_gen_const_int_1 (dst, -op1);
28840       dst.neg ();
28841       return;
28842     }
28843
28844   /* In the general case, we need 7 instructions to build
28845      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28846      do better if VAL is small enough, or
28847      right-shiftable by a suitable amount.  If the
28848      right-shift enables to encode at least one less byte,
28849      it's worth it: we save a adds and a lsls at the
28850      expense of a final lsls.  */
28851   int final_shift = number_of_first_bit_set (val);
28852
28853   int leading_zeroes = clz_hwi (val);
28854   int number_of_bytes_needed
28855     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28856        / BITS_PER_UNIT) + 1;
28857   int number_of_bytes_needed2
28858     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28859        / BITS_PER_UNIT) + 1;
28860
28861   if (number_of_bytes_needed2 < number_of_bytes_needed)
28862     val >>= final_shift;
28863   else
28864     final_shift = 0;
28865
28866   /* If we are in a very small range, we can use either a single movs
28867      or movs+adds.  */
28868   if (val <= 510)
28869     {
28870       if (val > 255)
28871         {
28872           unsigned HOST_WIDE_INT high = val - 255;
28873
28874           dst.mov (high);
28875           dst.add (255);
28876         }
28877       else
28878         dst.mov (val);
28879
28880       if (final_shift > 0)
28881         dst.ashift (final_shift);
28882     }
28883   else
28884     {
28885       /* General case, emit upper 3 bytes as needed.  */
28886       for (i = 0; i < 3; i++)
28887         {
28888           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28889
28890           if (byte)
28891             {
28892               /* We are about to emit new bits, stop accumulating a
28893                  shift amount, and left-shift only if we have already
28894                  emitted some upper bits.  */
28895               if (mov_done_p)
28896                 {
28897                   dst.ashift (shift);
28898                   dst.add (byte);
28899                 }
28900               else
28901                 dst.mov (byte);
28902
28903               /* Stop accumulating shift amount since we've just
28904                  emitted some bits.  */
28905               shift = 0;
28906
28907               mov_done_p = true;
28908             }
28909
28910           if (mov_done_p)
28911             shift += 8;
28912         }
28913
28914       /* Emit lower byte.  */
28915       if (!mov_done_p)
28916         dst.mov (val & 0xff);
28917       else
28918         {
28919           dst.ashift (shift);
28920           if (val & 0xff)
28921             dst.add (val & 0xff);
28922         }
28923
28924       if (final_shift > 0)
28925         dst.ashift (final_shift);
28926     }
28927 }
28928
28929 /* Proxies for thumb1.md, since the thumb1_const_print and
28930    thumb1_const_rtl classes are not exported.  */
28931 void
28932 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28933 {
28934   thumb1_const_rtl t (dst);
28935   thumb1_gen_const_int_1 (t, op1);
28936 }
28937
28938 void
28939 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28940 {
28941   thumb1_const_print t (asm_out_file, REGNO (dst));
28942   thumb1_gen_const_int_1 (t, op1);
28943 }
28944
28945 /* Output code to add DELTA to the first argument, and then jump
28946    to FUNCTION.  Used for C++ multiple inheritance.  */
28947
28948 static void
28949 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28950                      HOST_WIDE_INT, tree function)
28951 {
28952   static int thunk_label = 0;
28953   char label[256];
28954   char labelpc[256];
28955   int mi_delta = delta;
28956   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28957   int shift = 0;
28958   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28959                     ? 1 : 0);
28960   if (mi_delta < 0)
28961     mi_delta = - mi_delta;
28962
28963   final_start_function (emit_barrier (), file, 1);
28964
28965   if (TARGET_THUMB1)
28966     {
28967       int labelno = thunk_label++;
28968       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28969       /* Thunks are entered in arm mode when available.  */
28970       if (TARGET_THUMB1_ONLY)
28971         {
28972           /* push r3 so we can use it as a temporary.  */
28973           /* TODO: Omit this save if r3 is not used.  */
28974           fputs ("\tpush {r3}\n", file);
28975
28976           /* With -mpure-code, we cannot load the address from the
28977              constant pool: we build it explicitly.  */
28978           if (target_pure_code)
28979             {
28980               fputs ("\tmovs\tr3, #:upper8_15:#", file);
28981               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28982               fputc ('\n', file);
28983               fputs ("\tlsls r3, #8\n", file);
28984               fputs ("\tadds\tr3, #:upper0_7:#", file);
28985               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28986               fputc ('\n', file);
28987               fputs ("\tlsls r3, #8\n", file);
28988               fputs ("\tadds\tr3, #:lower8_15:#", file);
28989               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28990               fputc ('\n', file);
28991               fputs ("\tlsls r3, #8\n", file);
28992               fputs ("\tadds\tr3, #:lower0_7:#", file);
28993               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28994               fputc ('\n', file);
28995             }
28996           else
28997             fputs ("\tldr\tr3, ", file);
28998         }
28999       else
29000         {
29001           fputs ("\tldr\tr12, ", file);
29002         }
29003
29004       if (!target_pure_code)
29005         {
29006           assemble_name (file, label);
29007           fputc ('\n', file);
29008         }
29009
29010       if (flag_pic)
29011         {
29012           /* If we are generating PIC, the ldr instruction below loads
29013              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
29014              the address of the add + 8, so we have:
29015
29016              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29017                  = target + 1.
29018
29019              Note that we have "+ 1" because some versions of GNU ld
29020              don't set the low bit of the result for R_ARM_REL32
29021              relocations against thumb function symbols.
29022              On ARMv6M this is +4, not +8.  */
29023           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29024           assemble_name (file, labelpc);
29025           fputs (":\n", file);
29026           if (TARGET_THUMB1_ONLY)
29027             {
29028               /* This is 2 insns after the start of the thunk, so we know it
29029                  is 4-byte aligned.  */
29030               fputs ("\tadd\tr3, pc, r3\n", file);
29031               fputs ("\tmov r12, r3\n", file);
29032             }
29033           else
29034             fputs ("\tadd\tr12, pc, r12\n", file);
29035         }
29036       else if (TARGET_THUMB1_ONLY)
29037         fputs ("\tmov r12, r3\n", file);
29038     }
29039   if (TARGET_THUMB1_ONLY)
29040     {
29041       if (mi_delta > 255)
29042         {
29043           /* With -mpure-code, we cannot load MI_DELTA from the
29044              constant pool: we build it explicitly.  */
29045           if (target_pure_code)
29046             {
29047               thumb1_const_print r3 (file, 3);
29048               thumb1_gen_const_int_1 (r3, mi_delta);
29049             }
29050           else
29051             {
29052               fputs ("\tldr\tr3, ", file);
29053               assemble_name (file, label);
29054               fputs ("+4\n", file);
29055             }
29056           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29057                        mi_op, this_regno, this_regno);
29058         }
29059       else if (mi_delta != 0)
29060         {
29061           /* Thumb1 unified syntax requires s suffix in instruction name when
29062              one of the operands is immediate.  */
29063           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29064                        mi_op, this_regno, this_regno,
29065                        mi_delta);
29066         }
29067     }
29068   else
29069     {
29070       /* TODO: Use movw/movt for large constants when available.  */
29071       while (mi_delta != 0)
29072         {
29073           if ((mi_delta & (3 << shift)) == 0)
29074             shift += 2;
29075           else
29076             {
29077               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29078                            mi_op, this_regno, this_regno,
29079                            mi_delta & (0xff << shift));
29080               mi_delta &= ~(0xff << shift);
29081               shift += 8;
29082             }
29083         }
29084     }
29085   if (TARGET_THUMB1)
29086     {
29087       if (TARGET_THUMB1_ONLY)
29088         fputs ("\tpop\t{r3}\n", file);
29089
29090       fprintf (file, "\tbx\tr12\n");
29091
29092       /* With -mpure-code, we don't need to emit literals for the
29093          function address and delta since we emitted code to build
29094          them.  */
29095       if (!target_pure_code)
29096         {
29097           ASM_OUTPUT_ALIGN (file, 2);
29098           assemble_name (file, label);
29099           fputs (":\n", file);
29100           if (flag_pic)
29101             {
29102               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29103               rtx tem = XEXP (DECL_RTL (function), 0);
29104               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29105                  pipeline offset is four rather than eight.  Adjust the offset
29106                  accordingly.  */
29107               tem = plus_constant (GET_MODE (tem), tem,
29108                                    TARGET_THUMB1_ONLY ? -3 : -7);
29109               tem = gen_rtx_MINUS (GET_MODE (tem),
29110                                    tem,
29111                                    gen_rtx_SYMBOL_REF (Pmode,
29112                                                        ggc_strdup (labelpc)));
29113               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29114             }
29115           else
29116             /* Output ".word .LTHUNKn".  */
29117             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29118
29119           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29120             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29121         }
29122     }
29123   else
29124     {
29125       fputs ("\tb\t", file);
29126       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29127       if (NEED_PLT_RELOC)
29128         fputs ("(PLT)", file);
29129       fputc ('\n', file);
29130     }
29131
29132   final_end_function ();
29133 }
29134
29135 /* MI thunk handling for TARGET_32BIT.  */
29136
29137 static void
29138 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29139                        HOST_WIDE_INT vcall_offset, tree function)
29140 {
29141   const bool long_call_p = arm_is_long_call_p (function);
29142
29143   /* On ARM, this_regno is R0 or R1 depending on
29144      whether the function returns an aggregate or not.
29145   */
29146   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29147                                        function)
29148                     ? R1_REGNUM : R0_REGNUM);
29149
29150   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29151   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29152   reload_completed = 1;
29153   emit_note (NOTE_INSN_PROLOGUE_END);
29154
29155   /* Add DELTA to THIS_RTX.  */
29156   if (delta != 0)
29157     arm_split_constant (PLUS, Pmode, NULL_RTX,
29158                         delta, this_rtx, this_rtx, false);
29159
29160   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29161   if (vcall_offset != 0)
29162     {
29163       /* Load *THIS_RTX.  */
29164       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29165       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29166       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29167                           false);
29168       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29169       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29170       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29171     }
29172
29173   /* Generate a tail call to the target function.  */
29174   if (!TREE_USED (function))
29175     {
29176       assemble_external (function);
29177       TREE_USED (function) = 1;
29178     }
29179   rtx funexp = XEXP (DECL_RTL (function), 0);
29180   if (long_call_p)
29181     {
29182       emit_move_insn (temp, funexp);
29183       funexp = temp;
29184     }
29185   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29186   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29187   SIBLING_CALL_P (insn) = 1;
29188   emit_barrier ();
29189
29190   /* Indirect calls require a bit of fixup in PIC mode.  */
29191   if (long_call_p)
29192     {
29193       split_all_insns_noflow ();
29194       arm_reorg ();
29195     }
29196
29197   insn = get_insns ();
29198   shorten_branches (insn);
29199   final_start_function (insn, file, 1);
29200   final (insn, file, 1);
29201   final_end_function ();
29202
29203   /* Stop pretending this is a post-reload pass.  */
29204   reload_completed = 0;
29205 }
29206
29207 /* Output code to add DELTA to the first argument, and then jump
29208    to FUNCTION.  Used for C++ multiple inheritance.  */
29209
29210 static void
29211 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29212                      HOST_WIDE_INT vcall_offset, tree function)
29213 {
29214   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29215
29216   assemble_start_function (thunk, fnname);
29217   if (TARGET_32BIT)
29218     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29219   else
29220     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29221   assemble_end_function (thunk, fnname);
29222 }
29223
29224 int
29225 arm_emit_vector_const (FILE *file, rtx x)
29226 {
29227   int i;
29228   const char * pattern;
29229
29230   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29231
29232   switch (GET_MODE (x))
29233     {
29234     case E_V2SImode: pattern = "%08x"; break;
29235     case E_V4HImode: pattern = "%04x"; break;
29236     case E_V8QImode: pattern = "%02x"; break;
29237     default:       gcc_unreachable ();
29238     }
29239
29240   fprintf (file, "0x");
29241   for (i = CONST_VECTOR_NUNITS (x); i--;)
29242     {
29243       rtx element;
29244
29245       element = CONST_VECTOR_ELT (x, i);
29246       fprintf (file, pattern, INTVAL (element));
29247     }
29248
29249   return 1;
29250 }
29251
29252 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29253    HFmode constant pool entries are actually loaded with ldr.  */
29254 void
29255 arm_emit_fp16_const (rtx c)
29256 {
29257   long bits;
29258
29259   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29260   if (WORDS_BIG_ENDIAN)
29261     assemble_zeros (2);
29262   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29263   if (!WORDS_BIG_ENDIAN)
29264     assemble_zeros (2);
29265 }
29266
29267 const char *
29268 arm_output_load_gr (rtx *operands)
29269 {
29270   rtx reg;
29271   rtx offset;
29272   rtx wcgr;
29273   rtx sum;
29274
29275   if (!MEM_P (operands [1])
29276       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29277       || !REG_P (reg = XEXP (sum, 0))
29278       || !CONST_INT_P (offset = XEXP (sum, 1))
29279       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29280     return "wldrw%?\t%0, %1";
29281
29282   /* Fix up an out-of-range load of a GR register.  */
29283   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29284   wcgr = operands[0];
29285   operands[0] = reg;
29286   output_asm_insn ("ldr%?\t%0, %1", operands);
29287
29288   operands[0] = wcgr;
29289   operands[1] = reg;
29290   output_asm_insn ("tmcr%?\t%0, %1", operands);
29291   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29292
29293   return "";
29294 }
29295
29296 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29297
29298    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29299    named arg and all anonymous args onto the stack.
29300    XXX I know the prologue shouldn't be pushing registers, but it is faster
29301    that way.  */
29302
29303 static void
29304 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29305                             const function_arg_info &arg,
29306                             int *pretend_size,
29307                             int second_time ATTRIBUTE_UNUSED)
29308 {
29309   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29310   int nregs;
29311
29312   cfun->machine->uses_anonymous_args = 1;
29313   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29314     {
29315       nregs = pcum->aapcs_ncrn;
29316       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29317           && (nregs & 1))
29318         {
29319           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29320           if (res < 0 && warn_psabi)
29321             inform (input_location, "parameter passing for argument of "
29322                     "type %qT changed in GCC 7.1", arg.type);
29323           else if (res > 0)
29324             {
29325               nregs++;
29326               if (res > 1 && warn_psabi)
29327                 inform (input_location,
29328                         "parameter passing for argument of type "
29329                         "%qT changed in GCC 9.1", arg.type);
29330             }
29331         }
29332     }
29333   else
29334     nregs = pcum->nregs;
29335
29336   if (nregs < NUM_ARG_REGS)
29337     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29338 }
29339
29340 /* We can't rely on the caller doing the proper promotion when
29341    using APCS or ATPCS.  */
29342
29343 static bool
29344 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29345 {
29346     return !TARGET_AAPCS_BASED;
29347 }
29348
29349 static machine_mode
29350 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29351                            machine_mode mode,
29352                            int *punsignedp ATTRIBUTE_UNUSED,
29353                            const_tree fntype ATTRIBUTE_UNUSED,
29354                            int for_return ATTRIBUTE_UNUSED)
29355 {
29356   if (GET_MODE_CLASS (mode) == MODE_INT
29357       && GET_MODE_SIZE (mode) < 4)
29358     return SImode;
29359
29360   return mode;
29361 }
29362
29363
29364 static bool
29365 arm_default_short_enums (void)
29366 {
29367   return ARM_DEFAULT_SHORT_ENUMS;
29368 }
29369
29370
29371 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29372
29373 static bool
29374 arm_align_anon_bitfield (void)
29375 {
29376   return TARGET_AAPCS_BASED;
29377 }
29378
29379
29380 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29381
29382 static tree
29383 arm_cxx_guard_type (void)
29384 {
29385   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29386 }
29387
29388
29389 /* The EABI says test the least significant bit of a guard variable.  */
29390
29391 static bool
29392 arm_cxx_guard_mask_bit (void)
29393 {
29394   return TARGET_AAPCS_BASED;
29395 }
29396
29397
29398 /* The EABI specifies that all array cookies are 8 bytes long.  */
29399
29400 static tree
29401 arm_get_cookie_size (tree type)
29402 {
29403   tree size;
29404
29405   if (!TARGET_AAPCS_BASED)
29406     return default_cxx_get_cookie_size (type);
29407
29408   size = build_int_cst (sizetype, 8);
29409   return size;
29410 }
29411
29412
29413 /* The EABI says that array cookies should also contain the element size.  */
29414
29415 static bool
29416 arm_cookie_has_size (void)
29417 {
29418   return TARGET_AAPCS_BASED;
29419 }
29420
29421
29422 /* The EABI says constructors and destructors should return a pointer to
29423    the object constructed/destroyed.  */
29424
29425 static bool
29426 arm_cxx_cdtor_returns_this (void)
29427 {
29428   return TARGET_AAPCS_BASED;
29429 }
29430
29431 /* The EABI says that an inline function may never be the key
29432    method.  */
29433
29434 static bool
29435 arm_cxx_key_method_may_be_inline (void)
29436 {
29437   return !TARGET_AAPCS_BASED;
29438 }
29439
29440 static void
29441 arm_cxx_determine_class_data_visibility (tree decl)
29442 {
29443   if (!TARGET_AAPCS_BASED
29444       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29445     return;
29446
29447   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29448      is exported.  However, on systems without dynamic vague linkage,
29449      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29450   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29451     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29452   else
29453     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29454   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29455 }
29456
29457 static bool
29458 arm_cxx_class_data_always_comdat (void)
29459 {
29460   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29461      vague linkage if the class has no key function.  */
29462   return !TARGET_AAPCS_BASED;
29463 }
29464
29465
29466 /* The EABI says __aeabi_atexit should be used to register static
29467    destructors.  */
29468
29469 static bool
29470 arm_cxx_use_aeabi_atexit (void)
29471 {
29472   return TARGET_AAPCS_BASED;
29473 }
29474
29475
29476 void
29477 arm_set_return_address (rtx source, rtx scratch)
29478 {
29479   arm_stack_offsets *offsets;
29480   HOST_WIDE_INT delta;
29481   rtx addr, mem;
29482   unsigned long saved_regs;
29483
29484   offsets = arm_get_frame_offsets ();
29485   saved_regs = offsets->saved_regs_mask;
29486
29487   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29488     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29489   else
29490     {
29491       if (frame_pointer_needed)
29492         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29493       else
29494         {
29495           /* LR will be the first saved register.  */
29496           delta = offsets->outgoing_args - (offsets->frame + 4);
29497
29498
29499           if (delta >= 4096)
29500             {
29501               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29502                                      GEN_INT (delta & ~4095)));
29503               addr = scratch;
29504               delta &= 4095;
29505             }
29506           else
29507             addr = stack_pointer_rtx;
29508
29509           addr = plus_constant (Pmode, addr, delta);
29510         }
29511
29512       /* The store needs to be marked to prevent DSE from deleting
29513          it as dead if it is based on fp.  */
29514       mem = gen_frame_mem (Pmode, addr);
29515       MEM_VOLATILE_P (mem) = true;
29516       emit_move_insn (mem, source);
29517     }
29518 }
29519
29520
29521 void
29522 thumb_set_return_address (rtx source, rtx scratch)
29523 {
29524   arm_stack_offsets *offsets;
29525   HOST_WIDE_INT delta;
29526   HOST_WIDE_INT limit;
29527   int reg;
29528   rtx addr, mem;
29529   unsigned long mask;
29530
29531   emit_use (source);
29532
29533   offsets = arm_get_frame_offsets ();
29534   mask = offsets->saved_regs_mask;
29535   if (mask & (1 << LR_REGNUM))
29536     {
29537       limit = 1024;
29538       /* Find the saved regs.  */
29539       if (frame_pointer_needed)
29540         {
29541           delta = offsets->soft_frame - offsets->saved_args;
29542           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29543           if (TARGET_THUMB1)
29544             limit = 128;
29545         }
29546       else
29547         {
29548           delta = offsets->outgoing_args - offsets->saved_args;
29549           reg = SP_REGNUM;
29550         }
29551       /* Allow for the stack frame.  */
29552       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29553         delta -= 16;
29554       /* The link register is always the first saved register.  */
29555       delta -= 4;
29556
29557       /* Construct the address.  */
29558       addr = gen_rtx_REG (SImode, reg);
29559       if (delta > limit)
29560         {
29561           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29562           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29563           addr = scratch;
29564         }
29565       else
29566         addr = plus_constant (Pmode, addr, delta);
29567
29568       /* The store needs to be marked to prevent DSE from deleting
29569          it as dead if it is based on fp.  */
29570       mem = gen_frame_mem (Pmode, addr);
29571       MEM_VOLATILE_P (mem) = true;
29572       emit_move_insn (mem, source);
29573     }
29574   else
29575     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29576 }
29577
29578 /* Implements target hook vector_mode_supported_p.  */
29579 bool
29580 arm_vector_mode_supported_p (machine_mode mode)
29581 {
29582   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29583   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29584       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29585       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29586       || mode == V8BFmode))
29587     return true;
29588
29589   if ((TARGET_NEON || TARGET_IWMMXT)
29590       && ((mode == V2SImode)
29591           || (mode == V4HImode)
29592           || (mode == V8QImode)))
29593     return true;
29594
29595   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29596       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29597       || mode == V2HAmode))
29598     return true;
29599
29600   if (TARGET_HAVE_MVE
29601       && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29602     return true;
29603
29604   if (TARGET_HAVE_MVE_FLOAT
29605       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29606     return true;
29607
29608   return false;
29609 }
29610
29611 /* Implements target hook array_mode_supported_p.  */
29612
29613 static bool
29614 arm_array_mode_supported_p (machine_mode mode,
29615                             unsigned HOST_WIDE_INT nelems)
29616 {
29617   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29618      for now, as the lane-swapping logic needs to be extended in the expanders.
29619      See PR target/82518.  */
29620   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29621       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29622       && (nelems >= 2 && nelems <= 4))
29623     return true;
29624
29625   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29626       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29627     return true;
29628
29629   return false;
29630 }
29631
29632 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29633    registers when autovectorizing for Neon, at least until multiple vector
29634    widths are supported properly by the middle-end.  */
29635
29636 static machine_mode
29637 arm_preferred_simd_mode (scalar_mode mode)
29638 {
29639   if (TARGET_NEON)
29640     switch (mode)
29641       {
29642       case E_HFmode:
29643         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29644       case E_SFmode:
29645         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29646       case E_SImode:
29647         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29648       case E_HImode:
29649         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29650       case E_QImode:
29651         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29652       case E_DImode:
29653         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29654           return V2DImode;
29655         break;
29656
29657       default:;
29658       }
29659
29660   if (TARGET_REALLY_IWMMXT)
29661     switch (mode)
29662       {
29663       case E_SImode:
29664         return V2SImode;
29665       case E_HImode:
29666         return V4HImode;
29667       case E_QImode:
29668         return V8QImode;
29669
29670       default:;
29671       }
29672
29673   if (TARGET_HAVE_MVE)
29674     switch (mode)
29675       {
29676       case E_QImode:
29677         return V16QImode;
29678       case E_HImode:
29679         return V8HImode;
29680       case E_SImode:
29681         return V4SImode;
29682
29683       default:;
29684       }
29685
29686   if (TARGET_HAVE_MVE_FLOAT)
29687     switch (mode)
29688       {
29689       case E_HFmode:
29690         return V8HFmode;
29691       case E_SFmode:
29692         return V4SFmode;
29693
29694       default:;
29695       }
29696
29697   return word_mode;
29698 }
29699
29700 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29701
29702    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29703    using r0-r4 for function arguments, r7 for the stack frame and don't have
29704    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29705    potentially problematic instructions accept high registers so this is not
29706    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29707    that require many low registers.  */
29708 static bool
29709 arm_class_likely_spilled_p (reg_class_t rclass)
29710 {
29711   if ((TARGET_THUMB1 && rclass == LO_REGS)
29712       || rclass  == CC_REG)
29713     return true;
29714
29715   return default_class_likely_spilled_p (rclass);
29716 }
29717
29718 /* Implements target hook small_register_classes_for_mode_p.  */
29719 bool
29720 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29721 {
29722   return TARGET_THUMB1;
29723 }
29724
29725 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29726    ARM insns and therefore guarantee that the shift count is modulo 256.
29727    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29728    guarantee no particular behavior for out-of-range counts.  */
29729
29730 static unsigned HOST_WIDE_INT
29731 arm_shift_truncation_mask (machine_mode mode)
29732 {
29733   return mode == SImode ? 255 : 0;
29734 }
29735
29736
29737 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29738
29739 unsigned int
29740 arm_debugger_regno (unsigned int regno)
29741 {
29742   if (regno < 16)
29743     return regno;
29744
29745   if (IS_VFP_REGNUM (regno))
29746     {
29747       /* See comment in arm_dwarf_register_span.  */
29748       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29749         return 64 + regno - FIRST_VFP_REGNUM;
29750       else
29751         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29752     }
29753
29754   if (IS_IWMMXT_GR_REGNUM (regno))
29755     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29756
29757   if (IS_IWMMXT_REGNUM (regno))
29758     return 112 + regno - FIRST_IWMMXT_REGNUM;
29759
29760   if (IS_PAC_REGNUM (regno))
29761     return DWARF_PAC_REGNUM;
29762
29763   return DWARF_FRAME_REGISTERS;
29764 }
29765
29766 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29767    GCC models tham as 64 32-bit registers, so we need to describe this to
29768    the DWARF generation code.  Other registers can use the default.  */
29769 static rtx
29770 arm_dwarf_register_span (rtx rtl)
29771 {
29772   machine_mode mode;
29773   unsigned regno;
29774   rtx parts[16];
29775   int nregs;
29776   int i;
29777
29778   regno = REGNO (rtl);
29779   if (!IS_VFP_REGNUM (regno))
29780     return NULL_RTX;
29781
29782   /* XXX FIXME: The EABI defines two VFP register ranges:
29783         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29784         256-287: D0-D31
29785      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29786      corresponding D register.  Until GDB supports this, we shall use the
29787      legacy encodings.  We also use these encodings for D0-D15 for
29788      compatibility with older debuggers.  */
29789   mode = GET_MODE (rtl);
29790   if (GET_MODE_SIZE (mode) < 8)
29791     return NULL_RTX;
29792
29793   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29794     {
29795       nregs = GET_MODE_SIZE (mode) / 4;
29796       for (i = 0; i < nregs; i += 2)
29797         if (TARGET_BIG_END)
29798           {
29799             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29800             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29801           }
29802         else
29803           {
29804             parts[i] = gen_rtx_REG (SImode, regno + i);
29805             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29806           }
29807     }
29808   else
29809     {
29810       nregs = GET_MODE_SIZE (mode) / 8;
29811       for (i = 0; i < nregs; i++)
29812         parts[i] = gen_rtx_REG (DImode, regno + i);
29813     }
29814
29815   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29816 }
29817
29818 #if ARM_UNWIND_INFO
29819 /* Emit unwind directives for a store-multiple instruction or stack pointer
29820    push during alignment.
29821    These should only ever be generated by the function prologue code, so
29822    expect them to have a particular form.
29823    The store-multiple instruction sometimes pushes pc as the last register,
29824    although it should not be tracked into unwind information, or for -Os
29825    sometimes pushes some dummy registers before first register that needs
29826    to be tracked in unwind information; such dummy registers are there just
29827    to avoid separate stack adjustment, and will not be restored in the
29828    epilogue.  */
29829
29830 static void
29831 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29832 {
29833   int i;
29834   HOST_WIDE_INT offset;
29835   HOST_WIDE_INT nregs;
29836   int reg_size;
29837   unsigned reg;
29838   unsigned lastreg;
29839   unsigned padfirst = 0, padlast = 0;
29840   rtx e;
29841
29842   e = XVECEXP (p, 0, 0);
29843   gcc_assert (GET_CODE (e) == SET);
29844
29845   /* First insn will adjust the stack pointer.  */
29846   gcc_assert (GET_CODE (e) == SET
29847               && REG_P (SET_DEST (e))
29848               && REGNO (SET_DEST (e)) == SP_REGNUM
29849               && GET_CODE (SET_SRC (e)) == PLUS);
29850
29851   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29852   nregs = XVECLEN (p, 0) - 1;
29853   gcc_assert (nregs);
29854
29855   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29856   if (reg < 16 || IS_PAC_REGNUM (reg))
29857     {
29858       /* For -Os dummy registers can be pushed at the beginning to
29859          avoid separate stack pointer adjustment.  */
29860       e = XVECEXP (p, 0, 1);
29861       e = XEXP (SET_DEST (e), 0);
29862       if (GET_CODE (e) == PLUS)
29863         padfirst = INTVAL (XEXP (e, 1));
29864       gcc_assert (padfirst == 0 || optimize_size);
29865       /* The function prologue may also push pc, but not annotate it as it is
29866          never restored.  We turn this into a stack pointer adjustment.  */
29867       e = XVECEXP (p, 0, nregs);
29868       e = XEXP (SET_DEST (e), 0);
29869       if (GET_CODE (e) == PLUS)
29870         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29871       else
29872         padlast = offset - 4;
29873       gcc_assert (padlast == 0 || padlast == 4);
29874       if (padlast == 4)
29875         fprintf (out_file, "\t.pad #4\n");
29876       reg_size = 4;
29877       fprintf (out_file, "\t.save {");
29878     }
29879   else if (IS_VFP_REGNUM (reg))
29880     {
29881       reg_size = 8;
29882       fprintf (out_file, "\t.vsave {");
29883     }
29884   else
29885     /* Unknown register type.  */
29886     gcc_unreachable ();
29887
29888   /* If the stack increment doesn't match the size of the saved registers,
29889      something has gone horribly wrong.  */
29890   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29891
29892   offset = padfirst;
29893   lastreg = 0;
29894   /* The remaining insns will describe the stores.  */
29895   for (i = 1; i <= nregs; i++)
29896     {
29897       /* Expect (set (mem <addr>) (reg)).
29898          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29899       e = XVECEXP (p, 0, i);
29900       gcc_assert (GET_CODE (e) == SET
29901                   && MEM_P (SET_DEST (e))
29902                   && REG_P (SET_SRC (e)));
29903
29904       reg = REGNO (SET_SRC (e));
29905       gcc_assert (reg >= lastreg);
29906
29907       if (i != 1)
29908         fprintf (out_file, ", ");
29909       /* We can't use %r for vfp because we need to use the
29910          double precision register names.  */
29911       if (IS_VFP_REGNUM (reg))
29912         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29913       else if (IS_PAC_REGNUM (reg))
29914         asm_fprintf (asm_out_file, "ra_auth_code");
29915       else
29916         asm_fprintf (out_file, "%r", reg);
29917
29918       if (flag_checking)
29919         {
29920           /* Check that the addresses are consecutive.  */
29921           e = XEXP (SET_DEST (e), 0);
29922           if (GET_CODE (e) == PLUS)
29923             gcc_assert (REG_P (XEXP (e, 0))
29924                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29925                         && CONST_INT_P (XEXP (e, 1))
29926                         && offset == INTVAL (XEXP (e, 1)));
29927           else
29928             gcc_assert (i == 1
29929                         && REG_P (e)
29930                         && REGNO (e) == SP_REGNUM);
29931           offset += reg_size;
29932         }
29933     }
29934   fprintf (out_file, "}\n");
29935   if (padfirst)
29936     fprintf (out_file, "\t.pad #%d\n", padfirst);
29937 }
29938
29939 /*  Emit unwind directives for a SET.  */
29940
29941 static void
29942 arm_unwind_emit_set (FILE * out_file, rtx p)
29943 {
29944   rtx e0;
29945   rtx e1;
29946   unsigned reg;
29947
29948   e0 = XEXP (p, 0);
29949   e1 = XEXP (p, 1);
29950   switch (GET_CODE (e0))
29951     {
29952     case MEM:
29953       /* Pushing a single register.  */
29954       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29955           || !REG_P (XEXP (XEXP (e0, 0), 0))
29956           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29957         abort ();
29958
29959       asm_fprintf (out_file, "\t.save ");
29960       if (IS_VFP_REGNUM (REGNO (e1)))
29961         asm_fprintf(out_file, "{d%d}\n",
29962                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29963       else
29964         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29965       break;
29966
29967     case REG:
29968       if (REGNO (e0) == SP_REGNUM)
29969         {
29970           /* A stack increment.  */
29971           if (GET_CODE (e1) != PLUS
29972               || !REG_P (XEXP (e1, 0))
29973               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29974               || !CONST_INT_P (XEXP (e1, 1)))
29975             abort ();
29976
29977           asm_fprintf (out_file, "\t.pad #%wd\n",
29978                        -INTVAL (XEXP (e1, 1)));
29979         }
29980       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29981         {
29982           HOST_WIDE_INT offset;
29983
29984           if (GET_CODE (e1) == PLUS)
29985             {
29986               if (!REG_P (XEXP (e1, 0))
29987                   || !CONST_INT_P (XEXP (e1, 1)))
29988                 abort ();
29989               reg = REGNO (XEXP (e1, 0));
29990               offset = INTVAL (XEXP (e1, 1));
29991               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29992                            HARD_FRAME_POINTER_REGNUM, reg,
29993                            offset);
29994             }
29995           else if (REG_P (e1))
29996             {
29997               reg = REGNO (e1);
29998               asm_fprintf (out_file, "\t.setfp %r, %r\n",
29999                            HARD_FRAME_POINTER_REGNUM, reg);
30000             }
30001           else
30002             abort ();
30003         }
30004       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30005         {
30006           /* Move from sp to reg.  */
30007           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30008         }
30009       else if (GET_CODE (e1) == PLUS
30010               && REG_P (XEXP (e1, 0))
30011               && REGNO (XEXP (e1, 0)) == SP_REGNUM
30012               && CONST_INT_P (XEXP (e1, 1)))
30013         {
30014           /* Set reg to offset from sp.  */
30015           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30016                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30017         }
30018       else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30019         {
30020           if (cfun->machine->pacspval_needed)
30021             asm_fprintf (out_file, "\t.pacspval\n");
30022         }
30023       else
30024         abort ();
30025       break;
30026
30027     default:
30028       abort ();
30029     }
30030 }
30031
30032
30033 /* Emit unwind directives for the given insn.  */
30034
30035 static void
30036 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30037 {
30038   rtx note, pat;
30039   bool handled_one = false;
30040
30041   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30042     return;
30043
30044   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30045       && (TREE_NOTHROW (current_function_decl)
30046           || crtl->all_throwers_are_sibcalls))
30047     return;
30048
30049   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30050     return;
30051
30052   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30053     {
30054       switch (REG_NOTE_KIND (note))
30055         {
30056         case REG_FRAME_RELATED_EXPR:
30057           pat = XEXP (note, 0);
30058           goto found;
30059
30060         case REG_CFA_REGISTER:
30061           pat = XEXP (note, 0);
30062           if (pat == NULL)
30063             {
30064               pat = PATTERN (insn);
30065               if (GET_CODE (pat) == PARALLEL)
30066                 pat = XVECEXP (pat, 0, 0);
30067             }
30068
30069           /* Only emitted for IS_STACKALIGN re-alignment.  */
30070           {
30071             rtx dest, src;
30072             unsigned reg;
30073
30074             src = SET_SRC (pat);
30075             dest = SET_DEST (pat);
30076
30077             gcc_assert (src == stack_pointer_rtx
30078                         || IS_PAC_REGNUM (REGNO (src)));
30079             reg = REGNO (dest);
30080
30081             if (IS_PAC_REGNUM (REGNO (src)))
30082               arm_unwind_emit_set (out_file, PATTERN (insn));
30083             else
30084               asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30085                            reg + 0x90, reg);
30086           }
30087           handled_one = true;
30088           break;
30089
30090         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30091            to get correct dwarf information for shrink-wrap.  We should not
30092            emit unwind information for it because these are used either for
30093            pretend arguments or notes to adjust sp and restore registers from
30094            stack.  */
30095         case REG_CFA_DEF_CFA:
30096         case REG_CFA_ADJUST_CFA:
30097         case REG_CFA_RESTORE:
30098           return;
30099
30100         case REG_CFA_EXPRESSION:
30101         case REG_CFA_OFFSET:
30102           /* ??? Only handling here what we actually emit.  */
30103           gcc_unreachable ();
30104
30105         default:
30106           break;
30107         }
30108     }
30109   if (handled_one)
30110     return;
30111   pat = PATTERN (insn);
30112  found:
30113
30114   switch (GET_CODE (pat))
30115     {
30116     case SET:
30117       arm_unwind_emit_set (out_file, pat);
30118       break;
30119
30120     case SEQUENCE:
30121       /* Store multiple.  */
30122       arm_unwind_emit_sequence (out_file, pat);
30123       break;
30124
30125     default:
30126       abort();
30127     }
30128 }
30129
30130
30131 /* Output a reference from a function exception table to the type_info
30132    object X.  The EABI specifies that the symbol should be relocated by
30133    an R_ARM_TARGET2 relocation.  */
30134
30135 static bool
30136 arm_output_ttype (rtx x)
30137 {
30138   fputs ("\t.word\t", asm_out_file);
30139   output_addr_const (asm_out_file, x);
30140   /* Use special relocations for symbol references.  */
30141   if (!CONST_INT_P (x))
30142     fputs ("(TARGET2)", asm_out_file);
30143   fputc ('\n', asm_out_file);
30144
30145   return TRUE;
30146 }
30147
30148 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30149
30150 static void
30151 arm_asm_emit_except_personality (rtx personality)
30152 {
30153   fputs ("\t.personality\t", asm_out_file);
30154   output_addr_const (asm_out_file, personality);
30155   fputc ('\n', asm_out_file);
30156 }
30157 #endif /* ARM_UNWIND_INFO */
30158
30159 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30160
30161 static void
30162 arm_asm_init_sections (void)
30163 {
30164 #if ARM_UNWIND_INFO
30165   exception_section = get_unnamed_section (0, output_section_asm_op,
30166                                            "\t.handlerdata");
30167 #endif /* ARM_UNWIND_INFO */
30168
30169 #ifdef OBJECT_FORMAT_ELF
30170   if (target_pure_code)
30171     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30172 #endif
30173 }
30174
30175 /* Output unwind directives for the start/end of a function.  */
30176
30177 void
30178 arm_output_fn_unwind (FILE * f, bool prologue)
30179 {
30180   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30181     return;
30182
30183   if (prologue)
30184     fputs ("\t.fnstart\n", f);
30185   else
30186     {
30187       /* If this function will never be unwound, then mark it as such.
30188          The came condition is used in arm_unwind_emit to suppress
30189          the frame annotations.  */
30190       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30191           && (TREE_NOTHROW (current_function_decl)
30192               || crtl->all_throwers_are_sibcalls))
30193         fputs("\t.cantunwind\n", f);
30194
30195       fputs ("\t.fnend\n", f);
30196     }
30197 }
30198
30199 static bool
30200 arm_emit_tls_decoration (FILE *fp, rtx x)
30201 {
30202   enum tls_reloc reloc;
30203   rtx val;
30204
30205   val = XVECEXP (x, 0, 0);
30206   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30207
30208   output_addr_const (fp, val);
30209
30210   switch (reloc)
30211     {
30212     case TLS_GD32:
30213       fputs ("(tlsgd)", fp);
30214       break;
30215     case TLS_GD32_FDPIC:
30216       fputs ("(tlsgd_fdpic)", fp);
30217       break;
30218     case TLS_LDM32:
30219       fputs ("(tlsldm)", fp);
30220       break;
30221     case TLS_LDM32_FDPIC:
30222       fputs ("(tlsldm_fdpic)", fp);
30223       break;
30224     case TLS_LDO32:
30225       fputs ("(tlsldo)", fp);
30226       break;
30227     case TLS_IE32:
30228       fputs ("(gottpoff)", fp);
30229       break;
30230     case TLS_IE32_FDPIC:
30231       fputs ("(gottpoff_fdpic)", fp);
30232       break;
30233     case TLS_LE32:
30234       fputs ("(tpoff)", fp);
30235       break;
30236     case TLS_DESCSEQ:
30237       fputs ("(tlsdesc)", fp);
30238       break;
30239     default:
30240       gcc_unreachable ();
30241     }
30242
30243   switch (reloc)
30244     {
30245     case TLS_GD32:
30246     case TLS_LDM32:
30247     case TLS_IE32:
30248     case TLS_DESCSEQ:
30249       fputs (" + (. - ", fp);
30250       output_addr_const (fp, XVECEXP (x, 0, 2));
30251       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30252       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30253       output_addr_const (fp, XVECEXP (x, 0, 3));
30254       fputc (')', fp);
30255       break;
30256     default:
30257       break;
30258     }
30259
30260   return TRUE;
30261 }
30262
30263 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30264
30265 static void
30266 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30267 {
30268   gcc_assert (size == 4);
30269   fputs ("\t.word\t", file);
30270   output_addr_const (file, x);
30271   fputs ("(tlsldo)", file);
30272 }
30273
30274 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30275
30276 static bool
30277 arm_output_addr_const_extra (FILE *fp, rtx x)
30278 {
30279   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30280     return arm_emit_tls_decoration (fp, x);
30281   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30282     {
30283       char label[256];
30284       int labelno = INTVAL (XVECEXP (x, 0, 0));
30285
30286       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30287       assemble_name_raw (fp, label);
30288
30289       return TRUE;
30290     }
30291   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30292     {
30293       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30294       if (GOT_PCREL)
30295         fputs ("+.", fp);
30296       fputs ("-(", fp);
30297       output_addr_const (fp, XVECEXP (x, 0, 0));
30298       fputc (')', fp);
30299       return TRUE;
30300     }
30301   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30302     {
30303       output_addr_const (fp, XVECEXP (x, 0, 0));
30304       if (GOT_PCREL)
30305         fputs ("+.", fp);
30306       fputs ("-(", fp);
30307       output_addr_const (fp, XVECEXP (x, 0, 1));
30308       fputc (')', fp);
30309       return TRUE;
30310     }
30311   else if (GET_CODE (x) == CONST_VECTOR)
30312     return arm_emit_vector_const (fp, x);
30313
30314   return FALSE;
30315 }
30316
30317 /* Output assembly for a shift instruction.
30318    SET_FLAGS determines how the instruction modifies the condition codes.
30319    0 - Do not set condition codes.
30320    1 - Set condition codes.
30321    2 - Use smallest instruction.  */
30322 const char *
30323 arm_output_shift(rtx * operands, int set_flags)
30324 {
30325   char pattern[100];
30326   static const char flag_chars[3] = {'?', '.', '!'};
30327   const char *shift;
30328   HOST_WIDE_INT val;
30329   char c;
30330
30331   c = flag_chars[set_flags];
30332   shift = shift_op(operands[3], &val);
30333   if (shift)
30334     {
30335       if (val != -1)
30336         operands[2] = GEN_INT(val);
30337       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30338     }
30339   else
30340     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30341
30342   output_asm_insn (pattern, operands);
30343   return "";
30344 }
30345
30346 /* Output assembly for a WMMX immediate shift instruction.  */
30347 const char *
30348 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30349 {
30350   int shift = INTVAL (operands[2]);
30351   char templ[50];
30352   machine_mode opmode = GET_MODE (operands[0]);
30353
30354   gcc_assert (shift >= 0);
30355
30356   /* If the shift value in the register versions is > 63 (for D qualifier),
30357      31 (for W qualifier) or 15 (for H qualifier).  */
30358   if (((opmode == V4HImode) && (shift > 15))
30359         || ((opmode == V2SImode) && (shift > 31))
30360         || ((opmode == DImode) && (shift > 63)))
30361   {
30362     if (wror_or_wsra)
30363       {
30364         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30365         output_asm_insn (templ, operands);
30366         if (opmode == DImode)
30367           {
30368             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30369             output_asm_insn (templ, operands);
30370           }
30371       }
30372     else
30373       {
30374         /* The destination register will contain all zeros.  */
30375         sprintf (templ, "wzero\t%%0");
30376         output_asm_insn (templ, operands);
30377       }
30378     return "";
30379   }
30380
30381   if ((opmode == DImode) && (shift > 32))
30382     {
30383       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30384       output_asm_insn (templ, operands);
30385       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30386       output_asm_insn (templ, operands);
30387     }
30388   else
30389     {
30390       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30391       output_asm_insn (templ, operands);
30392     }
30393   return "";
30394 }
30395
30396 /* Output assembly for a WMMX tinsr instruction.  */
30397 const char *
30398 arm_output_iwmmxt_tinsr (rtx *operands)
30399 {
30400   int mask = INTVAL (operands[3]);
30401   int i;
30402   char templ[50];
30403   int units = mode_nunits[GET_MODE (operands[0])];
30404   gcc_assert ((mask & (mask - 1)) == 0);
30405   for (i = 0; i < units; ++i)
30406     {
30407       if ((mask & 0x01) == 1)
30408         {
30409           break;
30410         }
30411       mask >>= 1;
30412     }
30413   gcc_assert (i < units);
30414   {
30415     switch (GET_MODE (operands[0]))
30416       {
30417       case E_V8QImode:
30418         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30419         break;
30420       case E_V4HImode:
30421         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30422         break;
30423       case E_V2SImode:
30424         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30425         break;
30426       default:
30427         gcc_unreachable ();
30428         break;
30429       }
30430     output_asm_insn (templ, operands);
30431   }
30432   return "";
30433 }
30434
30435 /* Output a Thumb-1 casesi dispatch sequence.  */
30436 const char *
30437 thumb1_output_casesi (rtx *operands)
30438 {
30439   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30440
30441   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30442
30443   switch (GET_MODE(diff_vec))
30444     {
30445     case E_QImode:
30446       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30447               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30448     case E_HImode:
30449       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30450               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30451     case E_SImode:
30452       return "bl\t%___gnu_thumb1_case_si";
30453     default:
30454       gcc_unreachable ();
30455     }
30456 }
30457
30458 /* Output a Thumb-2 casesi instruction.  */
30459 const char *
30460 thumb2_output_casesi (rtx *operands)
30461 {
30462   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30463
30464   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30465
30466   output_asm_insn ("cmp\t%0, %1", operands);
30467   output_asm_insn ("bhi\t%l3", operands);
30468   switch (GET_MODE(diff_vec))
30469     {
30470     case E_QImode:
30471       return "tbb\t[%|pc, %0]";
30472     case E_HImode:
30473       return "tbh\t[%|pc, %0, lsl #1]";
30474     case E_SImode:
30475       if (flag_pic)
30476         {
30477           output_asm_insn ("adr\t%4, %l2", operands);
30478           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30479           output_asm_insn ("add\t%4, %4, %5", operands);
30480           return "bx\t%4";
30481         }
30482       else
30483         {
30484           output_asm_insn ("adr\t%4, %l2", operands);
30485           return "ldr\t%|pc, [%4, %0, lsl #2]";
30486         }
30487     default:
30488       gcc_unreachable ();
30489     }
30490 }
30491
30492 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30493    per-core tuning structs.  */
30494 static int
30495 arm_issue_rate (void)
30496 {
30497   return current_tune->issue_rate;
30498 }
30499
30500 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30501 static int
30502 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30503 {
30504   if (DEBUG_INSN_P (insn))
30505     return more;
30506
30507   rtx_code code = GET_CODE (PATTERN (insn));
30508   if (code == USE || code == CLOBBER)
30509     return more;
30510
30511   if (get_attr_type (insn) == TYPE_NO_INSN)
30512     return more;
30513
30514   return more - 1;
30515 }
30516
30517 /* Return how many instructions should scheduler lookahead to choose the
30518    best one.  */
30519 static int
30520 arm_first_cycle_multipass_dfa_lookahead (void)
30521 {
30522   int issue_rate = arm_issue_rate ();
30523
30524   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30525 }
30526
30527 /* Enable modeling of L2 auto-prefetcher.  */
30528 static int
30529 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30530 {
30531   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30532 }
30533
30534 const char *
30535 arm_mangle_type (const_tree type)
30536 {
30537   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30538      has to be managled as if it is in the "std" namespace.  */
30539   if (TARGET_AAPCS_BASED
30540       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30541     return "St9__va_list";
30542
30543   /* Half-precision floating point types.  */
30544   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30545     {
30546       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30547         return NULL;
30548       if (TYPE_MODE (type) == BFmode)
30549         return "u6__bf16";
30550       else
30551         return "Dh";
30552     }
30553
30554   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30555      builtin type.  */
30556   if (TYPE_NAME (type) != NULL)
30557     return arm_mangle_builtin_type (type);
30558
30559   /* Use the default mangling.  */
30560   return NULL;
30561 }
30562
30563 /* Order of allocation of core registers for Thumb: this allocation is
30564    written over the corresponding initial entries of the array
30565    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30566    first.  Saving and restoring a low register is usually cheaper than
30567    using a call-clobbered high register.  */
30568
30569 static const int thumb_core_reg_alloc_order[] =
30570 {
30571    3,  2,  1,  0,  4,  5,  6,  7,
30572   12, 14,  8,  9, 10, 11
30573 };
30574
30575 /* Adjust register allocation order when compiling for Thumb.  */
30576
30577 void
30578 arm_order_regs_for_local_alloc (void)
30579 {
30580   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30581   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30582   if (TARGET_THUMB)
30583     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30584             sizeof (thumb_core_reg_alloc_order));
30585 }
30586
30587 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30588
30589 bool
30590 arm_frame_pointer_required (void)
30591 {
30592   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30593     return true;
30594
30595   /* If the function receives nonlocal gotos, it needs to save the frame
30596      pointer in the nonlocal_goto_save_area object.  */
30597   if (cfun->has_nonlocal_label)
30598     return true;
30599
30600   /* The frame pointer is required for non-leaf APCS frames.  */
30601   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30602     return true;
30603
30604   /* If we are probing the stack in the prologue, we will have a faulting
30605      instruction prior to the stack adjustment and this requires a frame
30606      pointer if we want to catch the exception using the EABI unwinder.  */
30607   if (!IS_INTERRUPT (arm_current_func_type ())
30608       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30609           || flag_stack_clash_protection)
30610       && arm_except_unwind_info (&global_options) == UI_TARGET
30611       && cfun->can_throw_non_call_exceptions)
30612     {
30613       HOST_WIDE_INT size = get_frame_size ();
30614
30615       /* That's irrelevant if there is no stack adjustment.  */
30616       if (size <= 0)
30617         return false;
30618
30619       /* That's relevant only if there is a stack probe.  */
30620       if (crtl->is_leaf && !cfun->calls_alloca)
30621         {
30622           /* We don't have the final size of the frame so adjust.  */
30623           size += 32 * UNITS_PER_WORD;
30624           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30625             return true;
30626         }
30627       else
30628         return true;
30629     }
30630
30631   return false;
30632 }
30633
30634 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30635    All modes except THUMB1 have conditional execution.
30636    If we have conditional arithmetic, return false before reload to
30637    enable some ifcvt transformations. */
30638 static bool
30639 arm_have_conditional_execution (void)
30640 {
30641   bool has_cond_exec, enable_ifcvt_trans;
30642
30643   /* Only THUMB1 cannot support conditional execution. */
30644   has_cond_exec = !TARGET_THUMB1;
30645
30646   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30647      before reload. */
30648   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30649
30650   return has_cond_exec && !enable_ifcvt_trans;
30651 }
30652
30653 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30654 static HOST_WIDE_INT
30655 arm_vector_alignment (const_tree type)
30656 {
30657   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30658
30659   if (TARGET_AAPCS_BASED)
30660     align = MIN (align, 64);
30661
30662   return align;
30663 }
30664
30665 static unsigned int
30666 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30667 {
30668   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30669     {
30670       modes->safe_push (V16QImode);
30671       modes->safe_push (V8QImode);
30672     }
30673   return 0;
30674 }
30675
30676 static bool
30677 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30678 {
30679   /* Vectors which aren't in packed structures will not be less aligned than
30680      the natural alignment of their element type, so this is safe.  */
30681   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30682     return !is_packed;
30683
30684   return default_builtin_vector_alignment_reachable (type, is_packed);
30685 }
30686
30687 static bool
30688 arm_builtin_support_vector_misalignment (machine_mode mode,
30689                                          const_tree type, int misalignment,
30690                                          bool is_packed)
30691 {
30692   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30693     {
30694       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30695
30696       if (is_packed)
30697         return align == 1;
30698
30699       /* If the misalignment is unknown, we should be able to handle the access
30700          so long as it is not to a member of a packed data structure.  */
30701       if (misalignment == -1)
30702         return true;
30703
30704       /* Return true if the misalignment is a multiple of the natural alignment
30705          of the vector's element type.  This is probably always going to be
30706          true in practice, since we've already established that this isn't a
30707          packed access.  */
30708       return ((misalignment % align) == 0);
30709     }
30710
30711   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30712                                                       is_packed);
30713 }
30714
30715 static void
30716 arm_conditional_register_usage (void)
30717 {
30718   int regno;
30719
30720   if (TARGET_THUMB1 && optimize_size)
30721     {
30722       /* When optimizing for size on Thumb-1, it's better not
30723         to use the HI regs, because of the overhead of
30724         stacking them.  */
30725       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30726         fixed_regs[regno] = call_used_regs[regno] = 1;
30727     }
30728
30729   /* The link register can be clobbered by any branch insn,
30730      but we have no way to track that at present, so mark
30731      it as unavailable.  */
30732   if (TARGET_THUMB1)
30733     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30734
30735   if (TARGET_32BIT && TARGET_VFP_BASE)
30736     {
30737       /* VFPv3 registers are disabled when earlier VFP
30738          versions are selected due to the definition of
30739          LAST_VFP_REGNUM.  */
30740       for (regno = FIRST_VFP_REGNUM;
30741            regno <= LAST_VFP_REGNUM; ++ regno)
30742         {
30743           fixed_regs[regno] = 0;
30744           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30745             || regno >= FIRST_VFP_REGNUM + 32;
30746         }
30747       if (TARGET_HAVE_MVE)
30748         fixed_regs[VPR_REGNUM] = 0;
30749     }
30750
30751   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30752     {
30753       regno = FIRST_IWMMXT_GR_REGNUM;
30754       /* The 2002/10/09 revision of the XScale ABI has wCG0
30755          and wCG1 as call-preserved registers.  The 2002/11/21
30756          revision changed this so that all wCG registers are
30757          scratch registers.  */
30758       for (regno = FIRST_IWMMXT_GR_REGNUM;
30759            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30760         fixed_regs[regno] = 0;
30761       /* The XScale ABI has wR0 - wR9 as scratch registers,
30762          the rest as call-preserved registers.  */
30763       for (regno = FIRST_IWMMXT_REGNUM;
30764            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30765         {
30766           fixed_regs[regno] = 0;
30767           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30768         }
30769     }
30770
30771   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30772     {
30773       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30774       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30775     }
30776   else if (TARGET_APCS_STACK)
30777     {
30778       fixed_regs[10]     = 1;
30779       call_used_regs[10] = 1;
30780     }
30781   /* -mcaller-super-interworking reserves r11 for calls to
30782      _interwork_r11_call_via_rN().  Making the register global
30783      is an easy way of ensuring that it remains valid for all
30784      calls.  */
30785   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30786       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30787     {
30788       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30789       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30790       if (TARGET_CALLER_INTERWORKING)
30791         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30792     }
30793
30794   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30795   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30796   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30797
30798   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30799 }
30800
30801 static reg_class_t
30802 arm_preferred_rename_class (reg_class_t rclass)
30803 {
30804   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30805      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30806      and code size can be reduced.  */
30807   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30808     return LO_REGS;
30809   else
30810     return NO_REGS;
30811 }
30812
30813 /* Compute the attribute "length" of insn "*push_multi".
30814    So this function MUST be kept in sync with that insn pattern.  */
30815 int
30816 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30817 {
30818   int i, regno, hi_reg;
30819   int num_saves = XVECLEN (parallel_op, 0);
30820
30821   /* ARM mode.  */
30822   if (TARGET_ARM)
30823     return 4;
30824   /* Thumb1 mode.  */
30825   if (TARGET_THUMB1)
30826     return 2;
30827
30828   /* Thumb2 mode.  */
30829   regno = REGNO (first_op);
30830   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30831      list is 8-bit.  Normally this means all registers in the list must be
30832      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30833      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30834      with 16-bit encoding.  */
30835   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30836   for (i = 1; i < num_saves && !hi_reg; i++)
30837     {
30838       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30839       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30840     }
30841
30842   if (!hi_reg)
30843     return 2;
30844   return 4;
30845 }
30846
30847 /* Compute the attribute "length" of insn.  Currently, this function is used
30848    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30849    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30850    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30851    true if OPERANDS contains insn which explicit updates base register.  */
30852
30853 int
30854 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30855 {
30856   /* ARM mode.  */
30857   if (TARGET_ARM)
30858     return 4;
30859   /* Thumb1 mode.  */
30860   if (TARGET_THUMB1)
30861     return 2;
30862
30863   rtx parallel_op = operands[0];
30864   /* Initialize to elements number of PARALLEL.  */
30865   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30866   /* Initialize the value to base register.  */
30867   unsigned regno = REGNO (operands[1]);
30868   /* Skip return and write back pattern.
30869      We only need register pop pattern for later analysis.  */
30870   unsigned first_indx = 0;
30871   first_indx += return_pc ? 1 : 0;
30872   first_indx += write_back_p ? 1 : 0;
30873
30874   /* A pop operation can be done through LDM or POP.  If the base register is SP
30875      and if it's with write back, then a LDM will be alias of POP.  */
30876   bool pop_p = (regno == SP_REGNUM && write_back_p);
30877   bool ldm_p = !pop_p;
30878
30879   /* Check base register for LDM.  */
30880   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30881     return 4;
30882
30883   /* Check each register in the list.  */
30884   for (; indx >= first_indx; indx--)
30885     {
30886       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30887       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30888          comment in arm_attr_length_push_multi.  */
30889       if (REGNO_REG_CLASS (regno) == HI_REGS
30890           && (regno != PC_REGNUM || ldm_p))
30891         return 4;
30892     }
30893
30894   return 2;
30895 }
30896
30897 /* Compute the number of instructions emitted by output_move_double.  */
30898 int
30899 arm_count_output_move_double_insns (rtx *operands)
30900 {
30901   int count;
30902   rtx ops[2];
30903   /* output_move_double may modify the operands array, so call it
30904      here on a copy of the array.  */
30905   ops[0] = operands[0];
30906   ops[1] = operands[1];
30907   output_move_double (ops, false, &count);
30908   return count;
30909 }
30910
30911 /* Same as above, but operands are a register/memory pair in SImode.
30912    Assumes operands has the base register in position 0 and memory in position
30913    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30914 int
30915 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30916 {
30917   int count;
30918   rtx ops[2];
30919   int regnum, memnum;
30920   if (load)
30921     regnum = 0, memnum = 1;
30922   else
30923     regnum = 1, memnum = 0;
30924   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30925   ops[memnum] = adjust_address (operands[2], DImode, 0);
30926   output_move_double (ops, false, &count);
30927   return count;
30928 }
30929
30930
30931 int
30932 vfp3_const_double_for_fract_bits (rtx operand)
30933 {
30934   REAL_VALUE_TYPE r0;
30935
30936   if (!CONST_DOUBLE_P (operand))
30937     return 0;
30938
30939   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30940   if (exact_real_inverse (DFmode, &r0)
30941       && !REAL_VALUE_NEGATIVE (r0))
30942     {
30943       if (exact_real_truncate (DFmode, &r0))
30944         {
30945           HOST_WIDE_INT value = real_to_integer (&r0);
30946           value = value & 0xffffffff;
30947           if ((value != 0) && ( (value & (value - 1)) == 0))
30948             {
30949               int ret = exact_log2 (value);
30950               gcc_assert (IN_RANGE (ret, 0, 31));
30951               return ret;
30952             }
30953         }
30954     }
30955   return 0;
30956 }
30957
30958 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30959    log2 is in [1, 32], return that log2.  Otherwise return -1.
30960    This is used in the patterns for vcvt.s32.f32 floating-point to
30961    fixed-point conversions.  */
30962
30963 int
30964 vfp3_const_double_for_bits (rtx x)
30965 {
30966   const REAL_VALUE_TYPE *r;
30967
30968   if (!CONST_DOUBLE_P (x))
30969     return -1;
30970
30971   r = CONST_DOUBLE_REAL_VALUE (x);
30972
30973   if (REAL_VALUE_NEGATIVE (*r)
30974       || REAL_VALUE_ISNAN (*r)
30975       || REAL_VALUE_ISINF (*r)
30976       || !real_isinteger (r, SFmode))
30977     return -1;
30978
30979   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30980
30981 /* The exact_log2 above will have returned -1 if this is
30982    not an exact log2.  */
30983   if (!IN_RANGE (hwint, 1, 32))
30984     return -1;
30985
30986   return hwint;
30987 }
30988
30989 \f
30990 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30991
30992 static void
30993 arm_pre_atomic_barrier (enum memmodel model)
30994 {
30995   if (need_atomic_barrier_p (model, true))
30996     emit_insn (gen_memory_barrier ());
30997 }
30998
30999 static void
31000 arm_post_atomic_barrier (enum memmodel model)
31001 {
31002   if (need_atomic_barrier_p (model, false))
31003     emit_insn (gen_memory_barrier ());
31004 }
31005
31006 /* Emit the load-exclusive and store-exclusive instructions.
31007    Use acquire and release versions if necessary.  */
31008
31009 static void
31010 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31011 {
31012   rtx (*gen) (rtx, rtx);
31013
31014   if (acq)
31015     {
31016       switch (mode)
31017         {
31018         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31019         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31020         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31021         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31022         default:
31023           gcc_unreachable ();
31024         }
31025     }
31026   else
31027     {
31028       switch (mode)
31029         {
31030         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31031         case E_HImode: gen = gen_arm_load_exclusivehi; break;
31032         case E_SImode: gen = gen_arm_load_exclusivesi; break;
31033         case E_DImode: gen = gen_arm_load_exclusivedi; break;
31034         default:
31035           gcc_unreachable ();
31036         }
31037     }
31038
31039   emit_insn (gen (rval, mem));
31040 }
31041
31042 static void
31043 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31044                           rtx mem, bool rel)
31045 {
31046   rtx (*gen) (rtx, rtx, rtx);
31047
31048   if (rel)
31049     {
31050       switch (mode)
31051         {
31052         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31053         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31054         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31055         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31056         default:
31057           gcc_unreachable ();
31058         }
31059     }
31060   else
31061     {
31062       switch (mode)
31063         {
31064         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31065         case E_HImode: gen = gen_arm_store_exclusivehi; break;
31066         case E_SImode: gen = gen_arm_store_exclusivesi; break;
31067         case E_DImode: gen = gen_arm_store_exclusivedi; break;
31068         default:
31069           gcc_unreachable ();
31070         }
31071     }
31072
31073   emit_insn (gen (bval, rval, mem));
31074 }
31075
31076 /* Mark the previous jump instruction as unlikely.  */
31077
31078 static void
31079 emit_unlikely_jump (rtx insn)
31080 {
31081   rtx_insn *jump = emit_jump_insn (insn);
31082   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31083 }
31084
31085 /* Expand a compare and swap pattern.  */
31086
31087 void
31088 arm_expand_compare_and_swap (rtx operands[])
31089 {
31090   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31091   machine_mode mode, cmp_mode;
31092
31093   bval = operands[0];
31094   rval = operands[1];
31095   mem = operands[2];
31096   oldval = operands[3];
31097   newval = operands[4];
31098   is_weak = operands[5];
31099   mod_s = operands[6];
31100   mod_f = operands[7];
31101   mode = GET_MODE (mem);
31102
31103   /* Normally the succ memory model must be stronger than fail, but in the
31104      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31105      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31106
31107   if (TARGET_HAVE_LDACQ
31108       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31109       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31110     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31111
31112   switch (mode)
31113     {
31114     case E_QImode:
31115     case E_HImode:
31116       /* For narrow modes, we're going to perform the comparison in SImode,
31117          so do the zero-extension now.  */
31118       rval = gen_reg_rtx (SImode);
31119       oldval = convert_modes (SImode, mode, oldval, true);
31120       /* FALLTHRU */
31121
31122     case E_SImode:
31123       /* Force the value into a register if needed.  We waited until after
31124          the zero-extension above to do this properly.  */
31125       if (!arm_add_operand (oldval, SImode))
31126         oldval = force_reg (SImode, oldval);
31127       break;
31128
31129     case E_DImode:
31130       if (!cmpdi_operand (oldval, mode))
31131         oldval = force_reg (mode, oldval);
31132       break;
31133
31134     default:
31135       gcc_unreachable ();
31136     }
31137
31138   if (TARGET_THUMB1)
31139     cmp_mode = E_SImode;
31140   else
31141     cmp_mode = CC_Zmode;
31142
31143   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31144   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31145                                         oldval, newval, is_weak, mod_s, mod_f));
31146
31147   if (mode == QImode || mode == HImode)
31148     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31149
31150   /* In all cases, we arrange for success to be signaled by Z set.
31151      This arrangement allows for the boolean result to be used directly
31152      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31153      boolean negation of the result is also stored in bval because Thumb-1
31154      backend lacks dependency tracking for CC flag due to flag-setting not
31155      being represented at RTL level.  */
31156   if (TARGET_THUMB1)
31157       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31158   else
31159     {
31160       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31161       emit_insn (gen_rtx_SET (bval, x));
31162     }
31163 }
31164
31165 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31166    another memory store between the load-exclusive and store-exclusive can
31167    reset the monitor from Exclusive to Open state.  This means we must wait
31168    until after reload to split the pattern, lest we get a register spill in
31169    the middle of the atomic sequence.  Success of the compare and swap is
31170    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31171    for Thumb-1 targets (ie. negation of the boolean value returned by
31172    atomic_compare_and_swapmode standard pattern in operand 0).  */
31173
31174 void
31175 arm_split_compare_and_swap (rtx operands[])
31176 {
31177   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31178   machine_mode mode;
31179   enum memmodel mod_s, mod_f;
31180   bool is_weak;
31181   rtx_code_label *label1, *label2;
31182   rtx x, cond;
31183
31184   rval = operands[1];
31185   mem = operands[2];
31186   oldval = operands[3];
31187   newval = operands[4];
31188   is_weak = (operands[5] != const0_rtx);
31189   mod_s_rtx = operands[6];
31190   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31191   mod_f = memmodel_from_int (INTVAL (operands[7]));
31192   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31193   mode = GET_MODE (mem);
31194
31195   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31196
31197   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31198   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31199
31200   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31201      a full barrier is emitted after the store-release.  */
31202   if (is_armv8_sync)
31203     use_acquire = false;
31204
31205   /* Checks whether a barrier is needed and emits one accordingly.  */
31206   if (!(use_acquire || use_release))
31207     arm_pre_atomic_barrier (mod_s);
31208
31209   label1 = NULL;
31210   if (!is_weak)
31211     {
31212       label1 = gen_label_rtx ();
31213       emit_label (label1);
31214     }
31215   label2 = gen_label_rtx ();
31216
31217   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31218
31219   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31220      as required to communicate with arm_expand_compare_and_swap.  */
31221   if (TARGET_32BIT)
31222     {
31223       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31224       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31225       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31226                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31227       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31228     }
31229   else
31230     {
31231       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31232       if (thumb1_cmpneg_operand (oldval, SImode))
31233         {
31234           rtx src = rval;
31235           if (!satisfies_constraint_L (oldval))
31236             {
31237               gcc_assert (satisfies_constraint_J (oldval));
31238
31239               /* For such immediates, ADDS needs the source and destination regs
31240                  to be the same.
31241
31242                  Normally this would be handled by RA, but this is all happening
31243                  after RA.  */
31244               emit_move_insn (neg_bval, rval);
31245               src = neg_bval;
31246             }
31247
31248           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31249                                                        label2, cond));
31250         }
31251       else
31252         {
31253           emit_move_insn (neg_bval, const1_rtx);
31254           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31255         }
31256     }
31257
31258   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31259
31260   /* Weak or strong, we want EQ to be true for success, so that we
31261      match the flags that we got from the compare above.  */
31262   if (TARGET_32BIT)
31263     {
31264       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31265       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31266       emit_insn (gen_rtx_SET (cond, x));
31267     }
31268
31269   if (!is_weak)
31270     {
31271       /* Z is set to boolean value of !neg_bval, as required to communicate
31272          with arm_expand_compare_and_swap.  */
31273       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31274       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31275     }
31276
31277   if (!is_mm_relaxed (mod_f))
31278     emit_label (label2);
31279
31280   /* Checks whether a barrier is needed and emits one accordingly.  */
31281   if (is_armv8_sync
31282       || !(use_acquire || use_release))
31283     arm_post_atomic_barrier (mod_s);
31284
31285   if (is_mm_relaxed (mod_f))
31286     emit_label (label2);
31287 }
31288
31289 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31290    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31291    operation).  Operation is performed on the content at MEM and on VALUE
31292    following the memory model MODEL_RTX.  The content at MEM before and after
31293    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31294    success of the operation is returned in COND.  Using a scratch register or
31295    an operand register for these determines what result is returned for that
31296    pattern.  */
31297
31298 void
31299 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31300                      rtx value, rtx model_rtx, rtx cond)
31301 {
31302   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31303   machine_mode mode = GET_MODE (mem);
31304   machine_mode wmode = (mode == DImode ? DImode : SImode);
31305   rtx_code_label *label;
31306   bool all_low_regs, bind_old_new;
31307   rtx x;
31308
31309   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31310
31311   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31312   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31313
31314   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31315      a full barrier is emitted after the store-release.  */
31316   if (is_armv8_sync)
31317     use_acquire = false;
31318
31319   /* Checks whether a barrier is needed and emits one accordingly.  */
31320   if (!(use_acquire || use_release))
31321     arm_pre_atomic_barrier (model);
31322
31323   label = gen_label_rtx ();
31324   emit_label (label);
31325
31326   if (new_out)
31327     new_out = gen_lowpart (wmode, new_out);
31328   if (old_out)
31329     old_out = gen_lowpart (wmode, old_out);
31330   else
31331     old_out = new_out;
31332   value = simplify_gen_subreg (wmode, value, mode, 0);
31333
31334   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31335
31336   /* Does the operation require destination and first operand to use the same
31337      register?  This is decided by register constraints of relevant insn
31338      patterns in thumb1.md.  */
31339   gcc_assert (!new_out || REG_P (new_out));
31340   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31341                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31342                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31343   bind_old_new =
31344     (TARGET_THUMB1
31345      && code != SET
31346      && code != MINUS
31347      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31348
31349   /* We want to return the old value while putting the result of the operation
31350      in the same register as the old value so copy the old value over to the
31351      destination register and use that register for the operation.  */
31352   if (old_out && bind_old_new)
31353     {
31354       emit_move_insn (new_out, old_out);
31355       old_out = new_out;
31356     }
31357
31358   switch (code)
31359     {
31360     case SET:
31361       new_out = value;
31362       break;
31363
31364     case NOT:
31365       x = gen_rtx_AND (wmode, old_out, value);
31366       emit_insn (gen_rtx_SET (new_out, x));
31367       x = gen_rtx_NOT (wmode, new_out);
31368       emit_insn (gen_rtx_SET (new_out, x));
31369       break;
31370
31371     case MINUS:
31372       if (CONST_INT_P (value))
31373         {
31374           value = gen_int_mode (-INTVAL (value), wmode);
31375           code = PLUS;
31376         }
31377       /* FALLTHRU */
31378
31379     case PLUS:
31380       if (mode == DImode)
31381         {
31382           /* DImode plus/minus need to clobber flags.  */
31383           /* The adddi3 and subdi3 patterns are incorrectly written so that
31384              they require matching operands, even when we could easily support
31385              three operands.  Thankfully, this can be fixed up post-splitting,
31386              as the individual add+adc patterns do accept three operands and
31387              post-reload cprop can make these moves go away.  */
31388           emit_move_insn (new_out, old_out);
31389           if (code == PLUS)
31390             x = gen_adddi3 (new_out, new_out, value);
31391           else
31392             x = gen_subdi3 (new_out, new_out, value);
31393           emit_insn (x);
31394           break;
31395         }
31396       /* FALLTHRU */
31397
31398     default:
31399       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31400       emit_insn (gen_rtx_SET (new_out, x));
31401       break;
31402     }
31403
31404   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31405                             use_release);
31406
31407   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31408   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31409
31410   /* Checks whether a barrier is needed and emits one accordingly.  */
31411   if (is_armv8_sync
31412       || !(use_acquire || use_release))
31413     arm_post_atomic_barrier (model);
31414 }
31415 \f
31416 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31417 opt_machine_mode
31418 arm_mode_to_pred_mode (machine_mode mode)
31419 {
31420   switch (GET_MODE_NUNITS (mode))
31421     {
31422     case 16: return V16BImode;
31423     case 8: return V8BImode;
31424     case 4: return V4BImode;
31425     case 2: return V2QImode;
31426     }
31427   return opt_machine_mode ();
31428 }
31429
31430 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31431    If CAN_INVERT, store either the result or its inverse in TARGET
31432    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31433    always store the result in TARGET, never its inverse.
31434
31435    Note that the handling of floating-point comparisons is not
31436    IEEE compliant.  */
31437
31438 bool
31439 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31440                            bool can_invert)
31441 {
31442   machine_mode cmp_result_mode = GET_MODE (target);
31443   machine_mode cmp_mode = GET_MODE (op0);
31444
31445   bool inverted;
31446
31447   /* MVE supports more comparisons than Neon.  */
31448   if (TARGET_HAVE_MVE)
31449       inverted = false;
31450   else
31451     switch (code)
31452       {
31453         /* For these we need to compute the inverse of the requested
31454            comparison.  */
31455       case UNORDERED:
31456       case UNLT:
31457       case UNLE:
31458       case UNGT:
31459       case UNGE:
31460       case UNEQ:
31461       case NE:
31462         code = reverse_condition_maybe_unordered (code);
31463         if (!can_invert)
31464           {
31465             /* Recursively emit the inverted comparison into a temporary
31466                and then store its inverse in TARGET.  This avoids reusing
31467                TARGET (which for integer NE could be one of the inputs).  */
31468             rtx tmp = gen_reg_rtx (cmp_result_mode);
31469             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31470               gcc_unreachable ();
31471             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31472             return false;
31473           }
31474         inverted = true;
31475         break;
31476
31477       default:
31478         inverted = false;
31479         break;
31480       }
31481
31482   switch (code)
31483     {
31484     /* These are natively supported by Neon for zero comparisons, but otherwise
31485        require the operands to be swapped. For MVE, we can only compare
31486        registers.  */
31487     case LE:
31488     case LT:
31489       if (!TARGET_HAVE_MVE)
31490         if (op1 != CONST0_RTX (cmp_mode))
31491           {
31492             code = swap_condition (code);
31493             std::swap (op0, op1);
31494           }
31495       /* Fall through.  */
31496
31497     /* These are natively supported by Neon for both register and zero
31498        operands. MVE supports registers only.  */
31499     case EQ:
31500     case GE:
31501     case GT:
31502     case NE:
31503       if (TARGET_HAVE_MVE)
31504         {
31505           switch (GET_MODE_CLASS (cmp_mode))
31506             {
31507             case MODE_VECTOR_INT:
31508               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31509                                         op0, force_reg (cmp_mode, op1)));
31510               break;
31511             case MODE_VECTOR_FLOAT:
31512               if (TARGET_HAVE_MVE_FLOAT)
31513                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31514                                             op0, force_reg (cmp_mode, op1)));
31515               else
31516                 gcc_unreachable ();
31517               break;
31518             default:
31519               gcc_unreachable ();
31520             }
31521         }
31522       else
31523         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31524       return inverted;
31525
31526     /* These are natively supported for register operands only.
31527        Comparisons with zero aren't useful and should be folded
31528        or canonicalized by target-independent code.  */
31529     case GEU:
31530     case GTU:
31531       if (TARGET_HAVE_MVE)
31532         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31533                                   op0, force_reg (cmp_mode, op1)));
31534       else
31535         emit_insn (gen_neon_vc (code, cmp_mode, target,
31536                                 op0, force_reg (cmp_mode, op1)));
31537       return inverted;
31538
31539     /* These require the operands to be swapped and likewise do not
31540        support comparisons with zero.  */
31541     case LEU:
31542     case LTU:
31543       if (TARGET_HAVE_MVE)
31544         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31545                                   force_reg (cmp_mode, op1), op0));
31546       else
31547         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31548                                 target, force_reg (cmp_mode, op1), op0));
31549       return inverted;
31550
31551     /* These need a combination of two comparisons.  */
31552     case LTGT:
31553     case ORDERED:
31554       {
31555         /* Operands are LTGT iff (a > b || a > b).
31556            Operands are ORDERED iff (a > b || a <= b).  */
31557         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31558         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31559         rtx_code alt_code = (code == LTGT ? LT : LE);
31560         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31561             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31562           gcc_unreachable ();
31563         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31564                                                      gt_res, alt_res)));
31565         return inverted;
31566       }
31567
31568     default:
31569       gcc_unreachable ();
31570     }
31571 }
31572
31573 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31574    CMP_RESULT_MODE is the mode of the comparison result.  */
31575
31576 void
31577 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31578 {
31579   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31580      arm_expand_vector_compare, and another one here.  */
31581   rtx mask;
31582
31583   if (TARGET_HAVE_MVE)
31584     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31585   else
31586     mask = gen_reg_rtx (cmp_result_mode);
31587
31588   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31589                                              operands[4], operands[5], true);
31590   if (inverted)
31591     std::swap (operands[1], operands[2]);
31592   if (TARGET_NEON)
31593   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31594                             mask, operands[1], operands[2]));
31595   else
31596     {
31597       machine_mode cmp_mode = GET_MODE (operands[0]);
31598
31599       switch (GET_MODE_CLASS (cmp_mode))
31600         {
31601         case MODE_VECTOR_INT:
31602           emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31603                                      operands[1], operands[2], mask));
31604           break;
31605         case MODE_VECTOR_FLOAT:
31606           if (TARGET_HAVE_MVE_FLOAT)
31607             emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31608                                          operands[1], operands[2], mask));
31609           else
31610             gcc_unreachable ();
31611           break;
31612         default:
31613           gcc_unreachable ();
31614         }
31615     }
31616 }
31617 \f
31618 #define MAX_VECT_LEN 16
31619
31620 struct expand_vec_perm_d
31621 {
31622   rtx target, op0, op1;
31623   vec_perm_indices perm;
31624   machine_mode vmode;
31625   bool one_vector_p;
31626   bool testing_p;
31627 };
31628
31629 /* Generate a variable permutation.  */
31630
31631 static void
31632 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31633 {
31634   machine_mode vmode = GET_MODE (target);
31635   bool one_vector_p = rtx_equal_p (op0, op1);
31636
31637   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31638   gcc_checking_assert (GET_MODE (op0) == vmode);
31639   gcc_checking_assert (GET_MODE (op1) == vmode);
31640   gcc_checking_assert (GET_MODE (sel) == vmode);
31641   gcc_checking_assert (TARGET_NEON);
31642
31643   if (one_vector_p)
31644     {
31645       if (vmode == V8QImode)
31646         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31647       else
31648         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31649     }
31650   else
31651     {
31652       rtx pair;
31653
31654       if (vmode == V8QImode)
31655         {
31656           pair = gen_reg_rtx (V16QImode);
31657           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31658           pair = gen_lowpart (TImode, pair);
31659           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31660         }
31661       else
31662         {
31663           pair = gen_reg_rtx (OImode);
31664           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31665           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31666         }
31667     }
31668 }
31669
31670 void
31671 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31672 {
31673   machine_mode vmode = GET_MODE (target);
31674   unsigned int nelt = GET_MODE_NUNITS (vmode);
31675   bool one_vector_p = rtx_equal_p (op0, op1);
31676   rtx mask;
31677
31678   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31679      numbering of elements for big-endian, we must reverse the order.  */
31680   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31681
31682   /* The VTBL instruction does not use a modulo index, so we must take care
31683      of that ourselves.  */
31684   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31685   mask = gen_const_vec_duplicate (vmode, mask);
31686   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31687
31688   arm_expand_vec_perm_1 (target, op0, op1, sel);
31689 }
31690
31691 /* Map lane ordering between architectural lane order, and GCC lane order,
31692    taking into account ABI.  See comment above output_move_neon for details.  */
31693
31694 static int
31695 neon_endian_lane_map (machine_mode mode, int lane)
31696 {
31697   if (BYTES_BIG_ENDIAN)
31698   {
31699     int nelems = GET_MODE_NUNITS (mode);
31700     /* Reverse lane order.  */
31701     lane = (nelems - 1 - lane);
31702     /* Reverse D register order, to match ABI.  */
31703     if (GET_MODE_SIZE (mode) == 16)
31704       lane = lane ^ (nelems / 2);
31705   }
31706   return lane;
31707 }
31708
31709 /* Some permutations index into pairs of vectors, this is a helper function
31710    to map indexes into those pairs of vectors.  */
31711
31712 static int
31713 neon_pair_endian_lane_map (machine_mode mode, int lane)
31714 {
31715   int nelem = GET_MODE_NUNITS (mode);
31716   if (BYTES_BIG_ENDIAN)
31717     lane =
31718       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31719   return lane;
31720 }
31721
31722 /* Generate or test for an insn that supports a constant permutation.  */
31723
31724 /* Recognize patterns for the VUZP insns.  */
31725
31726 static bool
31727 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31728 {
31729   unsigned int i, odd, mask, nelt = d->perm.length ();
31730   rtx out0, out1, in0, in1;
31731   int first_elem;
31732   int swap_nelt;
31733
31734   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31735     return false;
31736
31737   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31738      big endian pattern on 64 bit vectors, so we correct for that.  */
31739   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31740     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31741
31742   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31743
31744   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31745     odd = 0;
31746   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31747     odd = 1;
31748   else
31749     return false;
31750   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31751
31752   for (i = 0; i < nelt; i++)
31753     {
31754       unsigned elt =
31755         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31756       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31757         return false;
31758     }
31759
31760   /* Success!  */
31761   if (d->testing_p)
31762     return true;
31763
31764   in0 = d->op0;
31765   in1 = d->op1;
31766   if (swap_nelt != 0)
31767     std::swap (in0, in1);
31768
31769   out0 = d->target;
31770   out1 = gen_reg_rtx (d->vmode);
31771   if (odd)
31772     std::swap (out0, out1);
31773
31774   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31775   return true;
31776 }
31777
31778 /* Recognize patterns for the VZIP insns.  */
31779
31780 static bool
31781 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31782 {
31783   unsigned int i, high, mask, nelt = d->perm.length ();
31784   rtx out0, out1, in0, in1;
31785   int first_elem;
31786   bool is_swapped;
31787
31788   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31789     return false;
31790
31791   is_swapped = BYTES_BIG_ENDIAN;
31792
31793   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31794
31795   high = nelt / 2;
31796   if (first_elem == neon_endian_lane_map (d->vmode, high))
31797     ;
31798   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31799     high = 0;
31800   else
31801     return false;
31802   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31803
31804   for (i = 0; i < nelt / 2; i++)
31805     {
31806       unsigned elt =
31807         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31808       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31809           != elt)
31810         return false;
31811       elt =
31812         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31813       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31814           != elt)
31815         return false;
31816     }
31817
31818   /* Success!  */
31819   if (d->testing_p)
31820     return true;
31821
31822   in0 = d->op0;
31823   in1 = d->op1;
31824   if (is_swapped)
31825     std::swap (in0, in1);
31826
31827   out0 = d->target;
31828   out1 = gen_reg_rtx (d->vmode);
31829   if (high)
31830     std::swap (out0, out1);
31831
31832   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31833   return true;
31834 }
31835
31836 /* Recognize patterns for the VREV insns.  */
31837 static bool
31838 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31839 {
31840   unsigned int i, j, diff, nelt = d->perm.length ();
31841   rtx (*gen) (machine_mode, rtx, rtx);
31842
31843   if (!d->one_vector_p)
31844     return false;
31845
31846   diff = d->perm[0];
31847   switch (diff)
31848     {
31849     case 7:
31850        switch (d->vmode)
31851         {
31852          case E_V16QImode:
31853          case E_V8QImode:
31854           gen = gen_neon_vrev64;
31855           break;
31856          default:
31857           return false;
31858         }
31859        break;
31860     case 3:
31861        switch (d->vmode)
31862         {
31863         case E_V16QImode:
31864         case E_V8QImode:
31865           gen = gen_neon_vrev32;
31866           break;
31867         case E_V8HImode:
31868         case E_V4HImode:
31869         case E_V8HFmode:
31870         case E_V4HFmode:
31871           gen = gen_neon_vrev64;
31872           break;
31873         default:
31874           return false;
31875         }
31876       break;
31877     case 1:
31878       switch (d->vmode)
31879         {
31880         case E_V16QImode:
31881         case E_V8QImode:
31882           gen = gen_neon_vrev16;
31883           break;
31884         case E_V8HImode:
31885         case E_V4HImode:
31886           gen = gen_neon_vrev32;
31887           break;
31888         case E_V4SImode:
31889         case E_V2SImode:
31890         case E_V4SFmode:
31891         case E_V2SFmode:
31892           gen = gen_neon_vrev64;
31893           break;
31894         default:
31895           return false;
31896         }
31897       break;
31898     default:
31899       return false;
31900     }
31901
31902   for (i = 0; i < nelt ; i += diff + 1)
31903     for (j = 0; j <= diff; j += 1)
31904       {
31905         /* This is guaranteed to be true as the value of diff
31906            is 7, 3, 1 and we should have enough elements in the
31907            queue to generate this. Getting a vector mask with a
31908            value of diff other than these values implies that
31909            something is wrong by the time we get here.  */
31910         gcc_assert (i + j < nelt);
31911         if (d->perm[i + j] != i + diff - j)
31912           return false;
31913       }
31914
31915   /* Success! */
31916   if (d->testing_p)
31917     return true;
31918
31919   emit_insn (gen (d->vmode, d->target, d->op0));
31920   return true;
31921 }
31922
31923 /* Recognize patterns for the VTRN insns.  */
31924
31925 static bool
31926 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31927 {
31928   unsigned int i, odd, mask, nelt = d->perm.length ();
31929   rtx out0, out1, in0, in1;
31930
31931   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31932     return false;
31933
31934   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31935   if (d->perm[0] == 0)
31936     odd = 0;
31937   else if (d->perm[0] == 1)
31938     odd = 1;
31939   else
31940     return false;
31941   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31942
31943   for (i = 0; i < nelt; i += 2)
31944     {
31945       if (d->perm[i] != i + odd)
31946         return false;
31947       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31948         return false;
31949     }
31950
31951   /* Success!  */
31952   if (d->testing_p)
31953     return true;
31954
31955   in0 = d->op0;
31956   in1 = d->op1;
31957   if (BYTES_BIG_ENDIAN)
31958     {
31959       std::swap (in0, in1);
31960       odd = !odd;
31961     }
31962
31963   out0 = d->target;
31964   out1 = gen_reg_rtx (d->vmode);
31965   if (odd)
31966     std::swap (out0, out1);
31967
31968   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31969   return true;
31970 }
31971
31972 /* Recognize patterns for the VEXT insns.  */
31973
31974 static bool
31975 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31976 {
31977   unsigned int i, nelt = d->perm.length ();
31978   rtx offset;
31979
31980   unsigned int location;
31981
31982   unsigned int next  = d->perm[0] + 1;
31983
31984   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31985   if (BYTES_BIG_ENDIAN)
31986     return false;
31987
31988   /* Check if the extracted indexes are increasing by one.  */
31989   for (i = 1; i < nelt; next++, i++)
31990     {
31991       /* If we hit the most significant element of the 2nd vector in
31992          the previous iteration, no need to test further.  */
31993       if (next == 2 * nelt)
31994         return false;
31995
31996       /* If we are operating on only one vector: it could be a
31997          rotation.  If there are only two elements of size < 64, let
31998          arm_evpc_neon_vrev catch it.  */
31999       if (d->one_vector_p && (next == nelt))
32000         {
32001           if ((nelt == 2) && (d->vmode != V2DImode))
32002             return false;
32003           else
32004             next = 0;
32005         }
32006
32007       if (d->perm[i] != next)
32008         return false;
32009     }
32010
32011   location = d->perm[0];
32012
32013   /* Success! */
32014   if (d->testing_p)
32015     return true;
32016
32017   offset = GEN_INT (location);
32018
32019   if(d->vmode == E_DImode)
32020     return false;
32021
32022   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32023   return true;
32024 }
32025
32026 /* The NEON VTBL instruction is a fully variable permuation that's even
32027    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
32028    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
32029    can do slightly better by expanding this as a constant where we don't
32030    have to apply a mask.  */
32031
32032 static bool
32033 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32034 {
32035   rtx rperm[MAX_VECT_LEN], sel;
32036   machine_mode vmode = d->vmode;
32037   unsigned int i, nelt = d->perm.length ();
32038
32039   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
32040      numbering of elements for big-endian, we must reverse the order.  */
32041   if (BYTES_BIG_ENDIAN)
32042     return false;
32043
32044   if (d->testing_p)
32045     return true;
32046
32047   /* Generic code will try constant permutation twice.  Once with the
32048      original mode and again with the elements lowered to QImode.
32049      So wait and don't do the selector expansion ourselves.  */
32050   if (vmode != V8QImode && vmode != V16QImode)
32051     return false;
32052
32053   for (i = 0; i < nelt; ++i)
32054     rperm[i] = GEN_INT (d->perm[i]);
32055   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32056   sel = force_reg (vmode, sel);
32057
32058   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32059   return true;
32060 }
32061
32062 static bool
32063 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32064 {
32065   /* Check if the input mask matches vext before reordering the
32066      operands.  */
32067   if (TARGET_NEON)
32068     if (arm_evpc_neon_vext (d))
32069       return true;
32070
32071   /* The pattern matching functions above are written to look for a small
32072      number to begin the sequence (0, 1, N/2).  If we begin with an index
32073      from the second operand, we can swap the operands.  */
32074   unsigned int nelt = d->perm.length ();
32075   if (d->perm[0] >= nelt)
32076     {
32077       d->perm.rotate_inputs (1);
32078       std::swap (d->op0, d->op1);
32079     }
32080
32081   if (TARGET_NEON)
32082     {
32083       if (arm_evpc_neon_vuzp (d))
32084         return true;
32085       if (arm_evpc_neon_vzip (d))
32086         return true;
32087       if (arm_evpc_neon_vrev (d))
32088         return true;
32089       if (arm_evpc_neon_vtrn (d))
32090         return true;
32091       return arm_evpc_neon_vtbl (d);
32092     }
32093   return false;
32094 }
32095
32096 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32097
32098 static bool
32099 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32100                               rtx target, rtx op0, rtx op1,
32101                               const vec_perm_indices &sel)
32102 {
32103   if (vmode != op_mode)
32104     return false;
32105
32106   struct expand_vec_perm_d d;
32107   int i, nelt, which;
32108
32109   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32110     return false;
32111
32112   d.target = target;
32113   if (op0)
32114     {
32115       rtx nop0 = force_reg (vmode, op0);
32116       if (op0 == op1)
32117         op1 = nop0;
32118       op0 = nop0;
32119     }
32120   if (op1)
32121     op1 = force_reg (vmode, op1);
32122   d.op0 = op0;
32123   d.op1 = op1;
32124
32125   d.vmode = vmode;
32126   gcc_assert (VECTOR_MODE_P (d.vmode));
32127   d.testing_p = !target;
32128
32129   nelt = GET_MODE_NUNITS (d.vmode);
32130   for (i = which = 0; i < nelt; ++i)
32131     {
32132       int ei = sel[i] & (2 * nelt - 1);
32133       which |= (ei < nelt ? 1 : 2);
32134     }
32135
32136   switch (which)
32137     {
32138     default:
32139       gcc_unreachable();
32140
32141     case 3:
32142       d.one_vector_p = false;
32143       if (d.testing_p || !rtx_equal_p (op0, op1))
32144         break;
32145
32146       /* The elements of PERM do not suggest that only the first operand
32147          is used, but both operands are identical.  Allow easier matching
32148          of the permutation by folding the permutation into the single
32149          input vector.  */
32150       /* FALLTHRU */
32151     case 2:
32152       d.op0 = op1;
32153       d.one_vector_p = true;
32154       break;
32155
32156     case 1:
32157       d.op1 = op0;
32158       d.one_vector_p = true;
32159       break;
32160     }
32161
32162   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32163
32164   if (!d.testing_p)
32165     return arm_expand_vec_perm_const_1 (&d);
32166
32167   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32168   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32169   if (!d.one_vector_p)
32170     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32171
32172   start_sequence ();
32173   bool ret = arm_expand_vec_perm_const_1 (&d);
32174   end_sequence ();
32175
32176   return ret;
32177 }
32178
32179 bool
32180 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32181 {
32182   /* If we are soft float and we do not have ldrd
32183      then all auto increment forms are ok.  */
32184   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32185     return true;
32186
32187   switch (code)
32188     {
32189       /* Post increment and Pre Decrement are supported for all
32190          instruction forms except for vector forms.  */
32191     case ARM_POST_INC:
32192     case ARM_PRE_DEC:
32193       if (VECTOR_MODE_P (mode))
32194         {
32195           if (code != ARM_PRE_DEC)
32196             return true;
32197           else
32198             return false;
32199         }
32200
32201       return true;
32202
32203     case ARM_POST_DEC:
32204     case ARM_PRE_INC:
32205       /* Without LDRD and mode size greater than
32206          word size, there is no point in auto-incrementing
32207          because ldm and stm will not have these forms.  */
32208       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32209         return false;
32210
32211       /* Vector and floating point modes do not support
32212          these auto increment forms.  */
32213       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32214         return false;
32215
32216       return true;
32217
32218     default:
32219       return false;
32220
32221     }
32222
32223   return false;
32224 }
32225
32226 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32227    on ARM, since we know that shifts by negative amounts are no-ops.
32228    Additionally, the default expansion code is not available or suitable
32229    for post-reload insn splits (this can occur when the register allocator
32230    chooses not to do a shift in NEON).
32231
32232    This function is used in both initial expand and post-reload splits, and
32233    handles all kinds of 64-bit shifts.
32234
32235    Input requirements:
32236     - It is safe for the input and output to be the same register, but
32237       early-clobber rules apply for the shift amount and scratch registers.
32238     - Shift by register requires both scratch registers.  In all other cases
32239       the scratch registers may be NULL.
32240     - Ashiftrt by a register also clobbers the CC register.  */
32241 void
32242 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32243                                rtx amount, rtx scratch1, rtx scratch2)
32244 {
32245   rtx out_high = gen_highpart (SImode, out);
32246   rtx out_low = gen_lowpart (SImode, out);
32247   rtx in_high = gen_highpart (SImode, in);
32248   rtx in_low = gen_lowpart (SImode, in);
32249
32250   /* Terminology:
32251         in = the register pair containing the input value.
32252         out = the destination register pair.
32253         up = the high- or low-part of each pair.
32254         down = the opposite part to "up".
32255      In a shift, we can consider bits to shift from "up"-stream to
32256      "down"-stream, so in a left-shift "up" is the low-part and "down"
32257      is the high-part of each register pair.  */
32258
32259   rtx out_up   = code == ASHIFT ? out_low : out_high;
32260   rtx out_down = code == ASHIFT ? out_high : out_low;
32261   rtx in_up   = code == ASHIFT ? in_low : in_high;
32262   rtx in_down = code == ASHIFT ? in_high : in_low;
32263
32264   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32265   gcc_assert (out
32266               && (REG_P (out) || SUBREG_P (out))
32267               && GET_MODE (out) == DImode);
32268   gcc_assert (in
32269               && (REG_P (in) || SUBREG_P (in))
32270               && GET_MODE (in) == DImode);
32271   gcc_assert (amount
32272               && (((REG_P (amount) || SUBREG_P (amount))
32273                    && GET_MODE (amount) == SImode)
32274                   || CONST_INT_P (amount)));
32275   gcc_assert (scratch1 == NULL
32276               || (GET_CODE (scratch1) == SCRATCH)
32277               || (GET_MODE (scratch1) == SImode
32278                   && REG_P (scratch1)));
32279   gcc_assert (scratch2 == NULL
32280               || (GET_CODE (scratch2) == SCRATCH)
32281               || (GET_MODE (scratch2) == SImode
32282                   && REG_P (scratch2)));
32283   gcc_assert (!REG_P (out) || !REG_P (amount)
32284               || !HARD_REGISTER_P (out)
32285               || (REGNO (out) != REGNO (amount)
32286                   && REGNO (out) + 1 != REGNO (amount)));
32287
32288   /* Macros to make following code more readable.  */
32289   #define SUB_32(DEST,SRC) \
32290             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32291   #define RSB_32(DEST,SRC) \
32292             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32293   #define SUB_S_32(DEST,SRC) \
32294             gen_addsi3_compare0 ((DEST), (SRC), \
32295                                  GEN_INT (-32))
32296   #define SET(DEST,SRC) \
32297             gen_rtx_SET ((DEST), (SRC))
32298   #define SHIFT(CODE,SRC,AMOUNT) \
32299             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32300   #define LSHIFT(CODE,SRC,AMOUNT) \
32301             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32302                             SImode, (SRC), (AMOUNT))
32303   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32304             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32305                             SImode, (SRC), (AMOUNT))
32306   #define ORR(A,B) \
32307             gen_rtx_IOR (SImode, (A), (B))
32308   #define BRANCH(COND,LABEL) \
32309             gen_arm_cond_branch ((LABEL), \
32310                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32311                                                    const0_rtx), \
32312                                  cc_reg)
32313
32314   /* Shifts by register and shifts by constant are handled separately.  */
32315   if (CONST_INT_P (amount))
32316     {
32317       /* We have a shift-by-constant.  */
32318
32319       /* First, handle out-of-range shift amounts.
32320          In both cases we try to match the result an ARM instruction in a
32321          shift-by-register would give.  This helps reduce execution
32322          differences between optimization levels, but it won't stop other
32323          parts of the compiler doing different things.  This is "undefined
32324          behavior, in any case.  */
32325       if (INTVAL (amount) <= 0)
32326         emit_insn (gen_movdi (out, in));
32327       else if (INTVAL (amount) >= 64)
32328         {
32329           if (code == ASHIFTRT)
32330             {
32331               rtx const31_rtx = GEN_INT (31);
32332               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32333               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32334             }
32335           else
32336             emit_insn (gen_movdi (out, const0_rtx));
32337         }
32338
32339       /* Now handle valid shifts. */
32340       else if (INTVAL (amount) < 32)
32341         {
32342           /* Shifts by a constant less than 32.  */
32343           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32344
32345           /* Clearing the out register in DImode first avoids lots
32346              of spilling and results in less stack usage.
32347              Later this redundant insn is completely removed.
32348              Do that only if "in" and "out" are different registers.  */
32349           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32350             emit_insn (SET (out, const0_rtx));
32351           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32352           emit_insn (SET (out_down,
32353                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32354                                out_down)));
32355           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32356         }
32357       else
32358         {
32359           /* Shifts by a constant greater than 31.  */
32360           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32361
32362           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32363             emit_insn (SET (out, const0_rtx));
32364           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32365           if (code == ASHIFTRT)
32366             emit_insn (gen_ashrsi3 (out_up, in_up,
32367                                     GEN_INT (31)));
32368           else
32369             emit_insn (SET (out_up, const0_rtx));
32370         }
32371     }
32372   else
32373     {
32374       /* We have a shift-by-register.  */
32375       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32376
32377       /* This alternative requires the scratch registers.  */
32378       gcc_assert (scratch1 && REG_P (scratch1));
32379       gcc_assert (scratch2 && REG_P (scratch2));
32380
32381       /* We will need the values "amount-32" and "32-amount" later.
32382          Swapping them around now allows the later code to be more general. */
32383       switch (code)
32384         {
32385         case ASHIFT:
32386           emit_insn (SUB_32 (scratch1, amount));
32387           emit_insn (RSB_32 (scratch2, amount));
32388           break;
32389         case ASHIFTRT:
32390           emit_insn (RSB_32 (scratch1, amount));
32391           /* Also set CC = amount > 32.  */
32392           emit_insn (SUB_S_32 (scratch2, amount));
32393           break;
32394         case LSHIFTRT:
32395           emit_insn (RSB_32 (scratch1, amount));
32396           emit_insn (SUB_32 (scratch2, amount));
32397           break;
32398         default:
32399           gcc_unreachable ();
32400         }
32401
32402       /* Emit code like this:
32403
32404          arithmetic-left:
32405             out_down = in_down << amount;
32406             out_down = (in_up << (amount - 32)) | out_down;
32407             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32408             out_up = in_up << amount;
32409
32410          arithmetic-right:
32411             out_down = in_down >> amount;
32412             out_down = (in_up << (32 - amount)) | out_down;
32413             if (amount < 32)
32414               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32415             out_up = in_up << amount;
32416
32417          logical-right:
32418             out_down = in_down >> amount;
32419             out_down = (in_up << (32 - amount)) | out_down;
32420             if (amount < 32)
32421               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32422             out_up = in_up << amount;
32423
32424           The ARM and Thumb2 variants are the same but implemented slightly
32425           differently.  If this were only called during expand we could just
32426           use the Thumb2 case and let combine do the right thing, but this
32427           can also be called from post-reload splitters.  */
32428
32429       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32430
32431       if (!TARGET_THUMB2)
32432         {
32433           /* Emit code for ARM mode.  */
32434           emit_insn (SET (out_down,
32435                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32436           if (code == ASHIFTRT)
32437             {
32438               rtx_code_label *done_label = gen_label_rtx ();
32439               emit_jump_insn (BRANCH (LT, done_label));
32440               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32441                                              out_down)));
32442               emit_label (done_label);
32443             }
32444           else
32445             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32446                                            out_down)));
32447         }
32448       else
32449         {
32450           /* Emit code for Thumb2 mode.
32451              Thumb2 can't do shift and or in one insn.  */
32452           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32453           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32454
32455           if (code == ASHIFTRT)
32456             {
32457               rtx_code_label *done_label = gen_label_rtx ();
32458               emit_jump_insn (BRANCH (LT, done_label));
32459               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32460               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32461               emit_label (done_label);
32462             }
32463           else
32464             {
32465               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32466               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32467             }
32468         }
32469
32470       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32471     }
32472
32473   #undef SUB_32
32474   #undef RSB_32
32475   #undef SUB_S_32
32476   #undef SET
32477   #undef SHIFT
32478   #undef LSHIFT
32479   #undef REV_LSHIFT
32480   #undef ORR
32481   #undef BRANCH
32482 }
32483
32484 /* Returns true if the pattern is a valid symbolic address, which is either a
32485    symbol_ref or (symbol_ref + addend).
32486
32487    According to the ARM ELF ABI, the initial addend of REL-type relocations
32488    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32489    literal field of the instruction as a 16-bit signed value in the range
32490    -32768 <= A < 32768.
32491
32492    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32493    unsigned range of 0 <= A < 256 as described in the AAELF32
32494    relocation handling documentation: REL-type relocations are encoded
32495    as unsigned in this case.  */
32496
32497 bool
32498 arm_valid_symbolic_address_p (rtx addr)
32499 {
32500   rtx xop0, xop1 = NULL_RTX;
32501   rtx tmp = addr;
32502
32503   if (target_word_relocations)
32504     return false;
32505
32506   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32507     return true;
32508
32509   /* (const (plus: symbol_ref const_int))  */
32510   if (GET_CODE (addr) == CONST)
32511     tmp = XEXP (addr, 0);
32512
32513   if (GET_CODE (tmp) == PLUS)
32514     {
32515       xop0 = XEXP (tmp, 0);
32516       xop1 = XEXP (tmp, 1);
32517
32518       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32519         {
32520           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32521             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32522           else
32523             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32524         }
32525     }
32526
32527   return false;
32528 }
32529
32530 /* Returns true if a valid comparison operation and makes
32531    the operands in a form that is valid.  */
32532 bool
32533 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32534 {
32535   enum rtx_code code = GET_CODE (*comparison);
32536   int code_int;
32537   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32538     ? GET_MODE (*op2) : GET_MODE (*op1);
32539
32540   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32541
32542   if (code == UNEQ || code == LTGT)
32543     return false;
32544
32545   code_int = (int)code;
32546   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32547   PUT_CODE (*comparison, (enum rtx_code)code_int);
32548
32549   switch (mode)
32550     {
32551     case E_SImode:
32552       if (!arm_add_operand (*op1, mode))
32553         *op1 = force_reg (mode, *op1);
32554       if (!arm_add_operand (*op2, mode))
32555         *op2 = force_reg (mode, *op2);
32556       return true;
32557
32558     case E_DImode:
32559       /* gen_compare_reg() will sort out any invalid operands.  */
32560       return true;
32561
32562     case E_HFmode:
32563       if (!TARGET_VFP_FP16INST)
32564         break;
32565       /* FP16 comparisons are done in SF mode.  */
32566       mode = SFmode;
32567       *op1 = convert_to_mode (mode, *op1, 1);
32568       *op2 = convert_to_mode (mode, *op2, 1);
32569       /* Fall through.  */
32570     case E_SFmode:
32571     case E_DFmode:
32572       if (!vfp_compare_operand (*op1, mode))
32573         *op1 = force_reg (mode, *op1);
32574       if (!vfp_compare_operand (*op2, mode))
32575         *op2 = force_reg (mode, *op2);
32576       return true;
32577     default:
32578       break;
32579     }
32580
32581   return false;
32582
32583 }
32584
32585 /* Maximum number of instructions to set block of memory.  */
32586 static int
32587 arm_block_set_max_insns (void)
32588 {
32589   if (optimize_function_for_size_p (cfun))
32590     return 4;
32591   else
32592     return current_tune->max_insns_inline_memset;
32593 }
32594
32595 /* Return TRUE if it's profitable to set block of memory for
32596    non-vectorized case.  VAL is the value to set the memory
32597    with.  LENGTH is the number of bytes to set.  ALIGN is the
32598    alignment of the destination memory in bytes.  UNALIGNED_P
32599    is TRUE if we can only set the memory with instructions
32600    meeting alignment requirements.  USE_STRD_P is TRUE if we
32601    can use strd to set the memory.  */
32602 static bool
32603 arm_block_set_non_vect_profit_p (rtx val,
32604                                  unsigned HOST_WIDE_INT length,
32605                                  unsigned HOST_WIDE_INT align,
32606                                  bool unaligned_p, bool use_strd_p)
32607 {
32608   int num = 0;
32609   /* For leftovers in bytes of 0-7, we can set the memory block using
32610      strb/strh/str with minimum instruction number.  */
32611   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32612
32613   if (unaligned_p)
32614     {
32615       num = arm_const_inline_cost (SET, val);
32616       num += length / align + length % align;
32617     }
32618   else if (use_strd_p)
32619     {
32620       num = arm_const_double_inline_cost (val);
32621       num += (length >> 3) + leftover[length & 7];
32622     }
32623   else
32624     {
32625       num = arm_const_inline_cost (SET, val);
32626       num += (length >> 2) + leftover[length & 3];
32627     }
32628
32629   /* We may be able to combine last pair STRH/STRB into a single STR
32630      by shifting one byte back.  */
32631   if (unaligned_access && length > 3 && (length & 3) == 3)
32632     num--;
32633
32634   return (num <= arm_block_set_max_insns ());
32635 }
32636
32637 /* Return TRUE if it's profitable to set block of memory for
32638    vectorized case.  LENGTH is the number of bytes to set.
32639    ALIGN is the alignment of destination memory in bytes.
32640    MODE is the vector mode used to set the memory.  */
32641 static bool
32642 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32643                              unsigned HOST_WIDE_INT align,
32644                              machine_mode mode)
32645 {
32646   int num;
32647   bool unaligned_p = ((align & 3) != 0);
32648   unsigned int nelt = GET_MODE_NUNITS (mode);
32649
32650   /* Instruction loading constant value.  */
32651   num = 1;
32652   /* Instructions storing the memory.  */
32653   num += (length + nelt - 1) / nelt;
32654   /* Instructions adjusting the address expression.  Only need to
32655      adjust address expression if it's 4 bytes aligned and bytes
32656      leftover can only be stored by mis-aligned store instruction.  */
32657   if (!unaligned_p && (length & 3) != 0)
32658     num++;
32659
32660   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32661   if (!unaligned_p && mode == V16QImode)
32662     num--;
32663
32664   return (num <= arm_block_set_max_insns ());
32665 }
32666
32667 /* Set a block of memory using vectorization instructions for the
32668    unaligned case.  We fill the first LENGTH bytes of the memory
32669    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32670    the alignment requirement of memory.  Return TRUE if succeeded.  */
32671 static bool
32672 arm_block_set_unaligned_vect (rtx dstbase,
32673                               unsigned HOST_WIDE_INT length,
32674                               unsigned HOST_WIDE_INT value,
32675                               unsigned HOST_WIDE_INT align)
32676 {
32677   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32678   rtx dst, mem;
32679   rtx val_vec, reg;
32680   rtx (*gen_func) (rtx, rtx);
32681   machine_mode mode;
32682   unsigned HOST_WIDE_INT v = value;
32683   unsigned int offset = 0;
32684   gcc_assert ((align & 0x3) != 0);
32685   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32686   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32687   if (length >= nelt_v16)
32688     {
32689       mode = V16QImode;
32690       gen_func = gen_movmisalignv16qi;
32691     }
32692   else
32693     {
32694       mode = V8QImode;
32695       gen_func = gen_movmisalignv8qi;
32696     }
32697   nelt_mode = GET_MODE_NUNITS (mode);
32698   gcc_assert (length >= nelt_mode);
32699   /* Skip if it isn't profitable.  */
32700   if (!arm_block_set_vect_profit_p (length, align, mode))
32701     return false;
32702
32703   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32704   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32705
32706   v = sext_hwi (v, BITS_PER_WORD);
32707
32708   reg = gen_reg_rtx (mode);
32709   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32710   /* Emit instruction loading the constant value.  */
32711   emit_move_insn (reg, val_vec);
32712
32713   /* Handle nelt_mode bytes in a vector.  */
32714   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32715     {
32716       emit_insn ((*gen_func) (mem, reg));
32717       if (i + 2 * nelt_mode <= length)
32718         {
32719           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32720           offset += nelt_mode;
32721           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32722         }
32723     }
32724
32725   /* If there are not less than nelt_v8 bytes leftover, we must be in
32726      V16QI mode.  */
32727   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32728
32729   /* Handle (8, 16) bytes leftover.  */
32730   if (i + nelt_v8 < length)
32731     {
32732       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32733       offset += length - i;
32734       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32735
32736       /* We are shifting bytes back, set the alignment accordingly.  */
32737       if ((length & 1) != 0 && align >= 2)
32738         set_mem_align (mem, BITS_PER_UNIT);
32739
32740       emit_insn (gen_movmisalignv16qi (mem, reg));
32741     }
32742   /* Handle (0, 8] bytes leftover.  */
32743   else if (i < length && i + nelt_v8 >= length)
32744     {
32745       if (mode == V16QImode)
32746         reg = gen_lowpart (V8QImode, reg);
32747
32748       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32749                                               + (nelt_mode - nelt_v8))));
32750       offset += (length - i) + (nelt_mode - nelt_v8);
32751       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32752
32753       /* We are shifting bytes back, set the alignment accordingly.  */
32754       if ((length & 1) != 0 && align >= 2)
32755         set_mem_align (mem, BITS_PER_UNIT);
32756
32757       emit_insn (gen_movmisalignv8qi (mem, reg));
32758     }
32759
32760   return true;
32761 }
32762
32763 /* Set a block of memory using vectorization instructions for the
32764    aligned case.  We fill the first LENGTH bytes of the memory area
32765    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32766    alignment requirement of memory.  Return TRUE if succeeded.  */
32767 static bool
32768 arm_block_set_aligned_vect (rtx dstbase,
32769                             unsigned HOST_WIDE_INT length,
32770                             unsigned HOST_WIDE_INT value,
32771                             unsigned HOST_WIDE_INT align)
32772 {
32773   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32774   rtx dst, addr, mem;
32775   rtx val_vec, reg;
32776   machine_mode mode;
32777   unsigned int offset = 0;
32778
32779   gcc_assert ((align & 0x3) == 0);
32780   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32781   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32782   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32783     mode = V16QImode;
32784   else
32785     mode = V8QImode;
32786
32787   nelt_mode = GET_MODE_NUNITS (mode);
32788   gcc_assert (length >= nelt_mode);
32789   /* Skip if it isn't profitable.  */
32790   if (!arm_block_set_vect_profit_p (length, align, mode))
32791     return false;
32792
32793   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32794
32795   reg = gen_reg_rtx (mode);
32796   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32797   /* Emit instruction loading the constant value.  */
32798   emit_move_insn (reg, val_vec);
32799
32800   i = 0;
32801   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32802   if (mode == V16QImode)
32803     {
32804       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32805       emit_insn (gen_movmisalignv16qi (mem, reg));
32806       i += nelt_mode;
32807       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32808       if (i + nelt_v8 < length && i + nelt_v16 > length)
32809         {
32810           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32811           offset += length - nelt_mode;
32812           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32813           /* We are shifting bytes back, set the alignment accordingly.  */
32814           if ((length & 0x3) == 0)
32815             set_mem_align (mem, BITS_PER_UNIT * 4);
32816           else if ((length & 0x1) == 0)
32817             set_mem_align (mem, BITS_PER_UNIT * 2);
32818           else
32819             set_mem_align (mem, BITS_PER_UNIT);
32820
32821           emit_insn (gen_movmisalignv16qi (mem, reg));
32822           return true;
32823         }
32824       /* Fall through for bytes leftover.  */
32825       mode = V8QImode;
32826       nelt_mode = GET_MODE_NUNITS (mode);
32827       reg = gen_lowpart (V8QImode, reg);
32828     }
32829
32830   /* Handle 8 bytes in a vector.  */
32831   for (; (i + nelt_mode <= length); i += nelt_mode)
32832     {
32833       addr = plus_constant (Pmode, dst, i);
32834       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32835       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32836         emit_move_insn (mem, reg);
32837       else
32838         emit_insn (gen_unaligned_storev8qi (mem, reg));
32839     }
32840
32841   /* Handle single word leftover by shifting 4 bytes back.  We can
32842      use aligned access for this case.  */
32843   if (i + UNITS_PER_WORD == length)
32844     {
32845       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32846       offset += i - UNITS_PER_WORD;
32847       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32848       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32849       if (align > UNITS_PER_WORD)
32850         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32851
32852       emit_insn (gen_unaligned_storev8qi (mem, reg));
32853     }
32854   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32855      We have to use unaligned access for this case.  */
32856   else if (i < length)
32857     {
32858       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32859       offset += length - nelt_mode;
32860       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32861       /* We are shifting bytes back, set the alignment accordingly.  */
32862       if ((length & 1) == 0)
32863         set_mem_align (mem, BITS_PER_UNIT * 2);
32864       else
32865         set_mem_align (mem, BITS_PER_UNIT);
32866
32867       emit_insn (gen_movmisalignv8qi (mem, reg));
32868     }
32869
32870   return true;
32871 }
32872
32873 /* Set a block of memory using plain strh/strb instructions, only
32874    using instructions allowed by ALIGN on processor.  We fill the
32875    first LENGTH bytes of the memory area starting from DSTBASE
32876    with byte constant VALUE.  ALIGN is the alignment requirement
32877    of memory.  */
32878 static bool
32879 arm_block_set_unaligned_non_vect (rtx dstbase,
32880                                   unsigned HOST_WIDE_INT length,
32881                                   unsigned HOST_WIDE_INT value,
32882                                   unsigned HOST_WIDE_INT align)
32883 {
32884   unsigned int i;
32885   rtx dst, addr, mem;
32886   rtx val_exp, val_reg, reg;
32887   machine_mode mode;
32888   HOST_WIDE_INT v = value;
32889
32890   gcc_assert (align == 1 || align == 2);
32891
32892   if (align == 2)
32893     v |= (value << BITS_PER_UNIT);
32894
32895   v = sext_hwi (v, BITS_PER_WORD);
32896   val_exp = GEN_INT (v);
32897   /* Skip if it isn't profitable.  */
32898   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32899                                         align, true, false))
32900     return false;
32901
32902   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32903   mode = (align == 2 ? HImode : QImode);
32904   val_reg = force_reg (SImode, val_exp);
32905   reg = gen_lowpart (mode, val_reg);
32906
32907   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32908     {
32909       addr = plus_constant (Pmode, dst, i);
32910       mem = adjust_automodify_address (dstbase, mode, addr, i);
32911       emit_move_insn (mem, reg);
32912     }
32913
32914   /* Handle single byte leftover.  */
32915   if (i + 1 == length)
32916     {
32917       reg = gen_lowpart (QImode, val_reg);
32918       addr = plus_constant (Pmode, dst, i);
32919       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32920       emit_move_insn (mem, reg);
32921       i++;
32922     }
32923
32924   gcc_assert (i == length);
32925   return true;
32926 }
32927
32928 /* Set a block of memory using plain strd/str/strh/strb instructions,
32929    to permit unaligned copies on processors which support unaligned
32930    semantics for those instructions.  We fill the first LENGTH bytes
32931    of the memory area starting from DSTBASE with byte constant VALUE.
32932    ALIGN is the alignment requirement of memory.  */
32933 static bool
32934 arm_block_set_aligned_non_vect (rtx dstbase,
32935                                 unsigned HOST_WIDE_INT length,
32936                                 unsigned HOST_WIDE_INT value,
32937                                 unsigned HOST_WIDE_INT align)
32938 {
32939   unsigned int i;
32940   rtx dst, addr, mem;
32941   rtx val_exp, val_reg, reg;
32942   unsigned HOST_WIDE_INT v;
32943   bool use_strd_p;
32944
32945   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32946                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32947
32948   v = (value | (value << 8) | (value << 16) | (value << 24));
32949   if (length < UNITS_PER_WORD)
32950     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32951
32952   if (use_strd_p)
32953     v |= (v << BITS_PER_WORD);
32954   else
32955     v = sext_hwi (v, BITS_PER_WORD);
32956
32957   val_exp = GEN_INT (v);
32958   /* Skip if it isn't profitable.  */
32959   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32960                                         align, false, use_strd_p))
32961     {
32962       if (!use_strd_p)
32963         return false;
32964
32965       /* Try without strd.  */
32966       v = (v >> BITS_PER_WORD);
32967       v = sext_hwi (v, BITS_PER_WORD);
32968       val_exp = GEN_INT (v);
32969       use_strd_p = false;
32970       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32971                                             align, false, use_strd_p))
32972         return false;
32973     }
32974
32975   i = 0;
32976   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32977   /* Handle double words using strd if possible.  */
32978   if (use_strd_p)
32979     {
32980       val_reg = force_reg (DImode, val_exp);
32981       reg = val_reg;
32982       for (; (i + 8 <= length); i += 8)
32983         {
32984           addr = plus_constant (Pmode, dst, i);
32985           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32986           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32987             emit_move_insn (mem, reg);
32988           else
32989             emit_insn (gen_unaligned_storedi (mem, reg));
32990         }
32991     }
32992   else
32993     val_reg = force_reg (SImode, val_exp);
32994
32995   /* Handle words.  */
32996   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32997   for (; (i + 4 <= length); i += 4)
32998     {
32999       addr = plus_constant (Pmode, dst, i);
33000       mem = adjust_automodify_address (dstbase, SImode, addr, i);
33001       if ((align & 3) == 0)
33002         emit_move_insn (mem, reg);
33003       else
33004         emit_insn (gen_unaligned_storesi (mem, reg));
33005     }
33006
33007   /* Merge last pair of STRH and STRB into a STR if possible.  */
33008   if (unaligned_access && i > 0 && (i + 3) == length)
33009     {
33010       addr = plus_constant (Pmode, dst, i - 1);
33011       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33012       /* We are shifting one byte back, set the alignment accordingly.  */
33013       if ((align & 1) == 0)
33014         set_mem_align (mem, BITS_PER_UNIT);
33015
33016       /* Most likely this is an unaligned access, and we can't tell at
33017          compilation time.  */
33018       emit_insn (gen_unaligned_storesi (mem, reg));
33019       return true;
33020     }
33021
33022   /* Handle half word leftover.  */
33023   if (i + 2 <= length)
33024     {
33025       reg = gen_lowpart (HImode, val_reg);
33026       addr = plus_constant (Pmode, dst, i);
33027       mem = adjust_automodify_address (dstbase, HImode, addr, i);
33028       if ((align & 1) == 0)
33029         emit_move_insn (mem, reg);
33030       else
33031         emit_insn (gen_unaligned_storehi (mem, reg));
33032
33033       i += 2;
33034     }
33035
33036   /* Handle single byte leftover.  */
33037   if (i + 1 == length)
33038     {
33039       reg = gen_lowpart (QImode, val_reg);
33040       addr = plus_constant (Pmode, dst, i);
33041       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33042       emit_move_insn (mem, reg);
33043     }
33044
33045   return true;
33046 }
33047
33048 /* Set a block of memory using vectorization instructions for both
33049    aligned and unaligned cases.  We fill the first LENGTH bytes of
33050    the memory area starting from DSTBASE with byte constant VALUE.
33051    ALIGN is the alignment requirement of memory.  */
33052 static bool
33053 arm_block_set_vect (rtx dstbase,
33054                     unsigned HOST_WIDE_INT length,
33055                     unsigned HOST_WIDE_INT value,
33056                     unsigned HOST_WIDE_INT align)
33057 {
33058   /* Check whether we need to use unaligned store instruction.  */
33059   if (((align & 3) != 0 || (length & 3) != 0)
33060       /* Check whether unaligned store instruction is available.  */
33061       && (!unaligned_access || BYTES_BIG_ENDIAN))
33062     return false;
33063
33064   if ((align & 3) == 0)
33065     return arm_block_set_aligned_vect (dstbase, length, value, align);
33066   else
33067     return arm_block_set_unaligned_vect (dstbase, length, value, align);
33068 }
33069
33070 /* Expand string store operation.  Firstly we try to do that by using
33071    vectorization instructions, then try with ARM unaligned access and
33072    double-word store if profitable.  OPERANDS[0] is the destination,
33073    OPERANDS[1] is the number of bytes, operands[2] is the value to
33074    initialize the memory, OPERANDS[3] is the known alignment of the
33075    destination.  */
33076 bool
33077 arm_gen_setmem (rtx *operands)
33078 {
33079   rtx dstbase = operands[0];
33080   unsigned HOST_WIDE_INT length;
33081   unsigned HOST_WIDE_INT value;
33082   unsigned HOST_WIDE_INT align;
33083
33084   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33085     return false;
33086
33087   length = UINTVAL (operands[1]);
33088   if (length > 64)
33089     return false;
33090
33091   value = (UINTVAL (operands[2]) & 0xFF);
33092   align = UINTVAL (operands[3]);
33093   if (TARGET_NEON && length >= 8
33094       && current_tune->string_ops_prefer_neon
33095       && arm_block_set_vect (dstbase, length, value, align))
33096     return true;
33097
33098   if (!unaligned_access && (align & 3) != 0)
33099     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33100
33101   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33102 }
33103
33104
33105 static bool
33106 arm_macro_fusion_p (void)
33107 {
33108   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33109 }
33110
33111 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33112    for MOVW / MOVT macro fusion.  */
33113
33114 static bool
33115 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33116 {
33117   /* We are trying to fuse
33118      movw imm / movt imm
33119     instructions as a group that gets scheduled together.  */
33120
33121   rtx set_dest = SET_DEST (curr_set);
33122
33123   if (GET_MODE (set_dest) != SImode)
33124     return false;
33125
33126   /* We are trying to match:
33127      prev (movw)  == (set (reg r0) (const_int imm16))
33128      curr (movt) == (set (zero_extract (reg r0)
33129                                         (const_int 16)
33130                                         (const_int 16))
33131                           (const_int imm16_1))
33132      or
33133      prev (movw) == (set (reg r1)
33134                           (high (symbol_ref ("SYM"))))
33135     curr (movt) == (set (reg r0)
33136                         (lo_sum (reg r1)
33137                                 (symbol_ref ("SYM"))))  */
33138
33139     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33140       {
33141         if (CONST_INT_P (SET_SRC (curr_set))
33142             && CONST_INT_P (SET_SRC (prev_set))
33143             && REG_P (XEXP (set_dest, 0))
33144             && REG_P (SET_DEST (prev_set))
33145             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33146           return true;
33147
33148       }
33149     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33150              && REG_P (SET_DEST (curr_set))
33151              && REG_P (SET_DEST (prev_set))
33152              && GET_CODE (SET_SRC (prev_set)) == HIGH
33153              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33154       return true;
33155
33156   return false;
33157 }
33158
33159 static bool
33160 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33161 {
33162   rtx prev_set = single_set (prev);
33163   rtx curr_set = single_set (curr);
33164
33165   if (!prev_set
33166       || !curr_set)
33167     return false;
33168
33169   if (any_condjump_p (curr))
33170     return false;
33171
33172   if (!arm_macro_fusion_p ())
33173     return false;
33174
33175   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33176       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33177     return true;
33178
33179   return false;
33180 }
33181
33182 /* Return true iff the instruction fusion described by OP is enabled.  */
33183 bool
33184 arm_fusion_enabled_p (tune_params::fuse_ops op)
33185 {
33186   return current_tune->fusible_ops & op;
33187 }
33188
33189 /* Return TRUE if return address signing mechanism is enabled.  */
33190 bool
33191 arm_current_function_pac_enabled_p (void)
33192 {
33193   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33194           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33195               && !crtl->is_leaf));
33196 }
33197
33198 /* Raise an error if the current target arch is not bti compatible.  */
33199 void aarch_bti_arch_check (void)
33200 {
33201   if (!arm_arch8m_main)
33202     error ("This architecture does not support branch protection instructions");
33203 }
33204
33205 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33206 bool
33207 aarch_bti_enabled (void)
33208 {
33209   return aarch_enable_bti != 0;
33210 }
33211
33212 /* Check if INSN is a BTI J insn.  */
33213 bool
33214 aarch_bti_j_insn_p (rtx_insn *insn)
33215 {
33216   if (!insn || !INSN_P (insn))
33217     return false;
33218
33219   rtx pat = PATTERN (insn);
33220   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33221 }
33222
33223 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
33224 bool
33225 aarch_pac_insn_p (rtx x)
33226 {
33227   if (!x || !INSN_P (x))
33228     return false;
33229
33230   rtx pat = PATTERN (x);
33231
33232   if (GET_CODE (pat) == SET)
33233     {
33234       rtx tmp = XEXP (pat, 1);
33235       if (tmp
33236           && ((GET_CODE (tmp) == UNSPEC
33237                && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33238               || (GET_CODE (tmp) == UNSPEC_VOLATILE
33239                   && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33240         return true;
33241     }
33242
33243   return false;
33244 }
33245
33246  /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33247     For Arm, both of these map to a simple BTI instruction.  */
33248
33249 rtx
33250 aarch_gen_bti_c (void)
33251 {
33252   return gen_bti_nop ();
33253 }
33254
33255 rtx
33256 aarch_gen_bti_j (void)
33257 {
33258   return gen_bti_nop ();
33259 }
33260
33261 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33262    scheduled for speculative execution.  Reject the long-running division
33263    and square-root instructions.  */
33264
33265 static bool
33266 arm_sched_can_speculate_insn (rtx_insn *insn)
33267 {
33268   switch (get_attr_type (insn))
33269     {
33270       case TYPE_SDIV:
33271       case TYPE_UDIV:
33272       case TYPE_FDIVS:
33273       case TYPE_FDIVD:
33274       case TYPE_FSQRTS:
33275       case TYPE_FSQRTD:
33276       case TYPE_NEON_FP_SQRT_S:
33277       case TYPE_NEON_FP_SQRT_D:
33278       case TYPE_NEON_FP_SQRT_S_Q:
33279       case TYPE_NEON_FP_SQRT_D_Q:
33280       case TYPE_NEON_FP_DIV_S:
33281       case TYPE_NEON_FP_DIV_D:
33282       case TYPE_NEON_FP_DIV_S_Q:
33283       case TYPE_NEON_FP_DIV_D_Q:
33284         return false;
33285       default:
33286         return true;
33287     }
33288 }
33289
33290 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33291
33292 static unsigned HOST_WIDE_INT
33293 arm_asan_shadow_offset (void)
33294 {
33295   return HOST_WIDE_INT_1U << 29;
33296 }
33297
33298
33299 /* This is a temporary fix for PR60655.  Ideally we need
33300    to handle most of these cases in the generic part but
33301    currently we reject minus (..) (sym_ref).  We try to
33302    ameliorate the case with minus (sym_ref1) (sym_ref2)
33303    where they are in the same section.  */
33304
33305 static bool
33306 arm_const_not_ok_for_debug_p (rtx p)
33307 {
33308   tree decl_op0 = NULL;
33309   tree decl_op1 = NULL;
33310
33311   if (GET_CODE (p) == UNSPEC)
33312     return true;
33313   if (GET_CODE (p) == MINUS)
33314     {
33315       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33316         {
33317           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33318           if (decl_op1
33319               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33320               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33321             {
33322               if ((VAR_P (decl_op1)
33323                    || TREE_CODE (decl_op1) == CONST_DECL)
33324                   && (VAR_P (decl_op0)
33325                       || TREE_CODE (decl_op0) == CONST_DECL))
33326                 return (get_variable_section (decl_op1, false)
33327                         != get_variable_section (decl_op0, false));
33328
33329               if (TREE_CODE (decl_op1) == LABEL_DECL
33330                   && TREE_CODE (decl_op0) == LABEL_DECL)
33331                 return (DECL_CONTEXT (decl_op1)
33332                         != DECL_CONTEXT (decl_op0));
33333             }
33334
33335           return true;
33336         }
33337     }
33338
33339   return false;
33340 }
33341
33342 /* return TRUE if x is a reference to a value in a constant pool */
33343 extern bool
33344 arm_is_constant_pool_ref (rtx x)
33345 {
33346   return (MEM_P (x)
33347           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33348           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33349 }
33350
33351 /* Remember the last target of arm_set_current_function.  */
33352 static GTY(()) tree arm_previous_fndecl;
33353
33354 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33355
33356 void
33357 save_restore_target_globals (tree new_tree)
33358 {
33359   /* If we have a previous state, use it.  */
33360   if (TREE_TARGET_GLOBALS (new_tree))
33361     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33362   else if (new_tree == target_option_default_node)
33363     restore_target_globals (&default_target_globals);
33364   else
33365     {
33366       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33367       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33368     }
33369
33370   arm_option_params_internal ();
33371 }
33372
33373 /* Invalidate arm_previous_fndecl.  */
33374
33375 void
33376 arm_reset_previous_fndecl (void)
33377 {
33378   arm_previous_fndecl = NULL_TREE;
33379 }
33380
33381 /* Establish appropriate back-end context for processing the function
33382    FNDECL.  The argument might be NULL to indicate processing at top
33383    level, outside of any function scope.  */
33384
33385 static void
33386 arm_set_current_function (tree fndecl)
33387 {
33388   if (!fndecl || fndecl == arm_previous_fndecl)
33389     return;
33390
33391   tree old_tree = (arm_previous_fndecl
33392                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33393                    : NULL_TREE);
33394
33395   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33396
33397   /* If current function has no attributes but previous one did,
33398      use the default node.  */
33399   if (! new_tree && old_tree)
33400     new_tree = target_option_default_node;
33401
33402   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33403      the default have been handled by save_restore_target_globals from
33404      arm_pragma_target_parse.  */
33405   if (old_tree == new_tree)
33406     return;
33407
33408   arm_previous_fndecl = fndecl;
33409
33410   /* First set the target options.  */
33411   cl_target_option_restore (&global_options, &global_options_set,
33412                             TREE_TARGET_OPTION (new_tree));
33413
33414   save_restore_target_globals (new_tree);
33415
33416   arm_override_options_after_change_1 (&global_options, &global_options_set);
33417 }
33418
33419 /* Implement TARGET_OPTION_PRINT.  */
33420
33421 static void
33422 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33423 {
33424   int flags = ptr->x_target_flags;
33425   const char *fpu_name;
33426
33427   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33428               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33429
33430   fprintf (file, "%*sselected isa %s\n", indent, "",
33431            TARGET_THUMB2_P (flags) ? "thumb2" :
33432            TARGET_THUMB_P (flags) ? "thumb1" :
33433            "arm");
33434
33435   if (ptr->x_arm_arch_string)
33436     fprintf (file, "%*sselected architecture %s\n", indent, "",
33437              ptr->x_arm_arch_string);
33438
33439   if (ptr->x_arm_cpu_string)
33440     fprintf (file, "%*sselected CPU %s\n", indent, "",
33441              ptr->x_arm_cpu_string);
33442
33443   if (ptr->x_arm_tune_string)
33444     fprintf (file, "%*sselected tune %s\n", indent, "",
33445              ptr->x_arm_tune_string);
33446
33447   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33448 }
33449
33450 /* Hook to determine if one function can safely inline another.  */
33451
33452 static bool
33453 arm_can_inline_p (tree caller, tree callee)
33454 {
33455   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33456   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33457   bool can_inline = true;
33458
33459   struct cl_target_option *caller_opts
33460         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33461                                            : target_option_default_node);
33462
33463   struct cl_target_option *callee_opts
33464         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33465                                            : target_option_default_node);
33466
33467   if (callee_opts == caller_opts)
33468     return true;
33469
33470   /* Callee's ISA features should be a subset of the caller's.  */
33471   struct arm_build_target caller_target;
33472   struct arm_build_target callee_target;
33473   caller_target.isa = sbitmap_alloc (isa_num_bits);
33474   callee_target.isa = sbitmap_alloc (isa_num_bits);
33475
33476   arm_configure_build_target (&caller_target, caller_opts, false);
33477   arm_configure_build_target (&callee_target, callee_opts, false);
33478   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33479     can_inline = false;
33480
33481   sbitmap_free (caller_target.isa);
33482   sbitmap_free (callee_target.isa);
33483
33484   /* OK to inline between different modes.
33485      Function with mode specific instructions, e.g using asm,
33486      must be explicitly protected with noinline.  */
33487   return can_inline;
33488 }
33489
33490 /* Hook to fix function's alignment affected by target attribute.  */
33491
33492 static void
33493 arm_relayout_function (tree fndecl)
33494 {
33495   if (DECL_USER_ALIGN (fndecl))
33496     return;
33497
33498   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33499
33500   if (!callee_tree)
33501     callee_tree = target_option_default_node;
33502
33503   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33504   SET_DECL_ALIGN
33505     (fndecl,
33506      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33507 }
33508
33509 /* Inner function to process the attribute((target(...))), take an argument and
33510    set the current options from the argument.  If we have a list, recursively
33511    go over the list.  */
33512
33513 static bool
33514 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33515 {
33516   if (TREE_CODE (args) == TREE_LIST)
33517     {
33518       bool ret = true;
33519
33520       for (; args; args = TREE_CHAIN (args))
33521         if (TREE_VALUE (args)
33522             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33523           ret = false;
33524       return ret;
33525     }
33526
33527   else if (TREE_CODE (args) != STRING_CST)
33528     {
33529       error ("attribute %<target%> argument not a string");
33530       return false;
33531     }
33532
33533   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33534   char *q;
33535
33536   while ((q = strtok (argstr, ",")) != NULL)
33537     {
33538       argstr = NULL;
33539       if (!strcmp (q, "thumb"))
33540         {
33541           opts->x_target_flags |= MASK_THUMB;
33542           if (TARGET_FDPIC && !arm_arch_thumb2)
33543             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33544         }
33545
33546       else if (!strcmp (q, "arm"))
33547         opts->x_target_flags &= ~MASK_THUMB;
33548
33549       else if (!strcmp (q, "general-regs-only"))
33550         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33551
33552       else if (startswith (q, "fpu="))
33553         {
33554           int fpu_index;
33555           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33556                                        &fpu_index, CL_TARGET))
33557             {
33558               error ("invalid fpu for target attribute or pragma %qs", q);
33559               return false;
33560             }
33561           if (fpu_index == TARGET_FPU_auto)
33562             {
33563               /* This doesn't really make sense until we support
33564                  general dynamic selection of the architecture and all
33565                  sub-features.  */
33566               sorry ("auto fpu selection not currently permitted here");
33567               return false;
33568             }
33569           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33570         }
33571       else if (startswith (q, "arch="))
33572         {
33573           char *arch = q + 5;
33574           const arch_option *arm_selected_arch
33575              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33576
33577           if (!arm_selected_arch)
33578             {
33579               error ("invalid architecture for target attribute or pragma %qs",
33580                      q);
33581               return false;
33582             }
33583
33584           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33585         }
33586       else if (q[0] == '+')
33587         {
33588           opts->x_arm_arch_string
33589             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33590         }
33591       else
33592         {
33593           error ("unknown target attribute or pragma %qs", q);
33594           return false;
33595         }
33596     }
33597
33598   return true;
33599 }
33600
33601 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33602
33603 tree
33604 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33605                                  struct gcc_options *opts_set)
33606 {
33607   struct cl_target_option cl_opts;
33608
33609   if (!arm_valid_target_attribute_rec (args, opts))
33610     return NULL_TREE;
33611
33612   cl_target_option_save (&cl_opts, opts, opts_set);
33613   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33614   arm_option_check_internal (opts);
33615   /* Do any overrides, such as global options arch=xxx.
33616      We do this since arm_active_target was overridden.  */
33617   arm_option_reconfigure_globals ();
33618   arm_options_perform_arch_sanity_checks ();
33619   arm_option_override_internal (opts, opts_set);
33620
33621   return build_target_option_node (opts, opts_set);
33622 }
33623
33624 static void
33625 add_attribute (const char * mode, tree *attributes)
33626 {
33627   size_t len = strlen (mode);
33628   tree value = build_string (len, mode);
33629
33630   TREE_TYPE (value) = build_array_type (char_type_node,
33631                                         build_index_type (size_int (len)));
33632
33633   *attributes = tree_cons (get_identifier ("target"),
33634                            build_tree_list (NULL_TREE, value),
33635                            *attributes);
33636 }
33637
33638 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33639
33640 static void
33641 arm_insert_attributes (tree fndecl, tree * attributes)
33642 {
33643   const char *mode;
33644
33645   if (! TARGET_FLIP_THUMB)
33646     return;
33647
33648   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33649       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33650    return;
33651
33652   /* Nested definitions must inherit mode.  */
33653   if (current_function_decl)
33654    {
33655      mode = TARGET_THUMB ? "thumb" : "arm";
33656      add_attribute (mode, attributes);
33657      return;
33658    }
33659
33660   /* If there is already a setting don't change it.  */
33661   if (lookup_attribute ("target", *attributes) != NULL)
33662     return;
33663
33664   mode = thumb_flipper ? "thumb" : "arm";
33665   add_attribute (mode, attributes);
33666
33667   thumb_flipper = !thumb_flipper;
33668 }
33669
33670 /* Hook to validate attribute((target("string"))).  */
33671
33672 static bool
33673 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33674                               tree args, int ARG_UNUSED (flags))
33675 {
33676   bool ret = true;
33677   struct gcc_options func_options, func_options_set;
33678   tree cur_tree, new_optimize;
33679   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33680
33681   /* Get the optimization options of the current function.  */
33682   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33683
33684   /* If the function changed the optimization levels as well as setting target
33685      options, start with the optimizations specified.  */
33686   if (!func_optimize)
33687     func_optimize = optimization_default_node;
33688
33689   /* Init func_options.  */
33690   memset (&func_options, 0, sizeof (func_options));
33691   init_options_struct (&func_options, NULL);
33692   lang_hooks.init_options_struct (&func_options);
33693   memset (&func_options_set, 0, sizeof (func_options_set));
33694
33695   /* Initialize func_options to the defaults.  */
33696   cl_optimization_restore (&func_options, &func_options_set,
33697                            TREE_OPTIMIZATION (func_optimize));
33698
33699   cl_target_option_restore (&func_options, &func_options_set,
33700                             TREE_TARGET_OPTION (target_option_default_node));
33701
33702   /* Set func_options flags with new target mode.  */
33703   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33704                                               &func_options_set);
33705
33706   if (cur_tree == NULL_TREE)
33707     ret = false;
33708
33709   new_optimize = build_optimization_node (&func_options, &func_options_set);
33710
33711   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33712
33713   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33714
33715   return ret;
33716 }
33717
33718 /* Match an ISA feature bitmap to a named FPU.  We always use the
33719    first entry that exactly matches the feature set, so that we
33720    effectively canonicalize the FPU name for the assembler.  */
33721 static const char*
33722 arm_identify_fpu_from_isa (sbitmap isa)
33723 {
33724   auto_sbitmap fpubits (isa_num_bits);
33725   auto_sbitmap cand_fpubits (isa_num_bits);
33726
33727   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33728
33729   /* If there are no ISA feature bits relating to the FPU, we must be
33730      doing soft-float.  */
33731   if (bitmap_empty_p (fpubits))
33732     return "softvfp";
33733
33734   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33735     {
33736       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33737       if (bitmap_equal_p (fpubits, cand_fpubits))
33738         return all_fpus[i].name;
33739     }
33740   /* We must find an entry, or things have gone wrong.  */
33741   gcc_unreachable ();
33742 }
33743
33744 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33745    by the function fndecl.  */
33746 void
33747 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33748 {
33749   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33750
33751   struct cl_target_option *targ_options;
33752   if (target_parts)
33753     targ_options = TREE_TARGET_OPTION (target_parts);
33754   else
33755     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33756   gcc_assert (targ_options);
33757
33758   arm_print_asm_arch_directives (stream, targ_options);
33759
33760   fprintf (stream, "\t.syntax unified\n");
33761
33762   if (TARGET_THUMB)
33763     {
33764       if (is_called_in_ARM_mode (decl)
33765           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33766               && cfun->is_thunk))
33767         fprintf (stream, "\t.code 32\n");
33768       else if (TARGET_THUMB1)
33769         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33770       else
33771         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33772     }
33773   else
33774     fprintf (stream, "\t.arm\n");
33775
33776   if (TARGET_POKE_FUNCTION_NAME)
33777     arm_poke_function_name (stream, (const char *) name);
33778 }
33779
33780 /* If MEM is in the form of [base+offset], extract the two parts
33781    of address and set to BASE and OFFSET, otherwise return false
33782    after clearing BASE and OFFSET.  */
33783
33784 static bool
33785 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33786 {
33787   rtx addr;
33788
33789   gcc_assert (MEM_P (mem));
33790
33791   addr = XEXP (mem, 0);
33792
33793   /* Strip off const from addresses like (const (addr)).  */
33794   if (GET_CODE (addr) == CONST)
33795     addr = XEXP (addr, 0);
33796
33797   if (REG_P (addr))
33798     {
33799       *base = addr;
33800       *offset = const0_rtx;
33801       return true;
33802     }
33803
33804   if (GET_CODE (addr) == PLUS
33805       && GET_CODE (XEXP (addr, 0)) == REG
33806       && CONST_INT_P (XEXP (addr, 1)))
33807     {
33808       *base = XEXP (addr, 0);
33809       *offset = XEXP (addr, 1);
33810       return true;
33811     }
33812
33813   *base = NULL_RTX;
33814   *offset = NULL_RTX;
33815
33816   return false;
33817 }
33818
33819 /* If INSN is a load or store of address in the form of [base+offset],
33820    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33821    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33822    otherwise return FALSE.  */
33823
33824 static bool
33825 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33826 {
33827   rtx x, dest, src;
33828
33829   gcc_assert (INSN_P (insn));
33830   x = PATTERN (insn);
33831   if (GET_CODE (x) != SET)
33832     return false;
33833
33834   src = SET_SRC (x);
33835   dest = SET_DEST (x);
33836   if (REG_P (src) && MEM_P (dest))
33837     {
33838       *is_load = false;
33839       extract_base_offset_in_addr (dest, base, offset);
33840     }
33841   else if (MEM_P (src) && REG_P (dest))
33842     {
33843       *is_load = true;
33844       extract_base_offset_in_addr (src, base, offset);
33845     }
33846   else
33847     return false;
33848
33849   return (*base != NULL_RTX && *offset != NULL_RTX);
33850 }
33851
33852 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33853
33854    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33855    and PRI are only calculated for these instructions.  For other instruction,
33856    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33857    instruction fusion can be supported by returning different priorities.
33858
33859    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33860
33861 static void
33862 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33863                            int *fusion_pri, int *pri)
33864 {
33865   int tmp, off_val;
33866   bool is_load;
33867   rtx base, offset;
33868
33869   gcc_assert (INSN_P (insn));
33870
33871   tmp = max_pri - 1;
33872   if (!fusion_load_store (insn, &base, &offset, &is_load))
33873     {
33874       *pri = tmp;
33875       *fusion_pri = tmp;
33876       return;
33877     }
33878
33879   /* Load goes first.  */
33880   if (is_load)
33881     *fusion_pri = tmp - 1;
33882   else
33883     *fusion_pri = tmp - 2;
33884
33885   tmp /= 2;
33886
33887   /* INSN with smaller base register goes first.  */
33888   tmp -= ((REGNO (base) & 0xff) << 20);
33889
33890   /* INSN with smaller offset goes first.  */
33891   off_val = (int)(INTVAL (offset));
33892   if (off_val >= 0)
33893     tmp -= (off_val & 0xfffff);
33894   else
33895     tmp += ((- off_val) & 0xfffff);
33896
33897   *pri = tmp;
33898   return;
33899 }
33900
33901
33902 /* Construct and return a PARALLEL RTX vector with elements numbering the
33903    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33904    the vector - from the perspective of the architecture.  This does not
33905    line up with GCC's perspective on lane numbers, so we end up with
33906    different masks depending on our target endian-ness.  The diagram
33907    below may help.  We must draw the distinction when building masks
33908    which select one half of the vector.  An instruction selecting
33909    architectural low-lanes for a big-endian target, must be described using
33910    a mask selecting GCC high-lanes.
33911
33912                  Big-Endian             Little-Endian
33913
33914 GCC             0   1   2   3           3   2   1   0
33915               | x | x | x | x |       | x | x | x | x |
33916 Architecture    3   2   1   0           3   2   1   0
33917
33918 Low Mask:         { 2, 3 }                { 0, 1 }
33919 High Mask:        { 0, 1 }                { 2, 3 }
33920 */
33921
33922 rtx
33923 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33924 {
33925   int nunits = GET_MODE_NUNITS (mode);
33926   rtvec v = rtvec_alloc (nunits / 2);
33927   int high_base = nunits / 2;
33928   int low_base = 0;
33929   int base;
33930   rtx t1;
33931   int i;
33932
33933   if (BYTES_BIG_ENDIAN)
33934     base = high ? low_base : high_base;
33935   else
33936     base = high ? high_base : low_base;
33937
33938   for (i = 0; i < nunits / 2; i++)
33939     RTVEC_ELT (v, i) = GEN_INT (base + i);
33940
33941   t1 = gen_rtx_PARALLEL (mode, v);
33942   return t1;
33943 }
33944
33945 /* Check OP for validity as a PARALLEL RTX vector with elements
33946    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33947    from the perspective of the architecture.  See the diagram above
33948    arm_simd_vect_par_cnst_half_p for more details.  */
33949
33950 bool
33951 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33952                                        bool high)
33953 {
33954   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33955   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33956   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33957   int i = 0;
33958
33959   if (!VECTOR_MODE_P (mode))
33960     return false;
33961
33962   if (count_op != count_ideal)
33963     return false;
33964
33965   for (i = 0; i < count_ideal; i++)
33966     {
33967       rtx elt_op = XVECEXP (op, 0, i);
33968       rtx elt_ideal = XVECEXP (ideal, 0, i);
33969
33970       if (!CONST_INT_P (elt_op)
33971           || INTVAL (elt_ideal) != INTVAL (elt_op))
33972         return false;
33973     }
33974   return true;
33975 }
33976
33977 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33978    in Thumb1.  */
33979 static bool
33980 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33981                          const_tree)
33982 {
33983   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33984   if (vcall_offset && TARGET_THUMB1)
33985     return false;
33986
33987   /* Otherwise ok.  */
33988   return true;
33989 }
33990
33991 /* Generate RTL for a conditional branch with rtx comparison CODE in
33992    mode CC_MODE. The destination of the unlikely conditional branch
33993    is LABEL_REF.  */
33994
33995 void
33996 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33997                           rtx label_ref)
33998 {
33999   rtx x;
34000   x = gen_rtx_fmt_ee (code, VOIDmode,
34001                       gen_rtx_REG (cc_mode, CC_REGNUM),
34002                       const0_rtx);
34003
34004   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34005                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
34006                             pc_rtx);
34007   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34008 }
34009
34010 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34011
34012    For pure-code sections there is no letter code for this attribute, so
34013    output all the section flags numerically when this is needed.  */
34014
34015 static bool
34016 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34017 {
34018
34019   if (flags & SECTION_ARM_PURECODE)
34020     {
34021       *num = 0x20000000;
34022
34023       if (!(flags & SECTION_DEBUG))
34024         *num |= 0x2;
34025       if (flags & SECTION_EXCLUDE)
34026         *num |= 0x80000000;
34027       if (flags & SECTION_WRITE)
34028         *num |= 0x1;
34029       if (flags & SECTION_CODE)
34030         *num |= 0x4;
34031       if (flags & SECTION_MERGE)
34032         *num |= 0x10;
34033       if (flags & SECTION_STRINGS)
34034         *num |= 0x20;
34035       if (flags & SECTION_TLS)
34036         *num |= 0x400;
34037       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34038         *num |= 0x200;
34039
34040         return true;
34041     }
34042
34043   return false;
34044 }
34045
34046 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34047
34048    If pure-code is passed as an option, make sure all functions are in
34049    sections that have the SHF_ARM_PURECODE attribute.  */
34050
34051 static section *
34052 arm_function_section (tree decl, enum node_frequency freq,
34053                       bool startup, bool exit)
34054 {
34055   const char * section_name;
34056   section * sec;
34057
34058   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34059     return default_function_section (decl, freq, startup, exit);
34060
34061   if (!target_pure_code)
34062     return default_function_section (decl, freq, startup, exit);
34063
34064
34065   section_name = DECL_SECTION_NAME (decl);
34066
34067   /* If a function is not in a named section then it falls under the 'default'
34068      text section, also known as '.text'.  We can preserve previous behavior as
34069      the default text section already has the SHF_ARM_PURECODE section
34070      attribute.  */
34071   if (!section_name)
34072     {
34073       section *default_sec = default_function_section (decl, freq, startup,
34074                                                        exit);
34075
34076       /* If default_sec is not null, then it must be a special section like for
34077          example .text.startup.  We set the pure-code attribute and return the
34078          same section to preserve existing behavior.  */
34079       if (default_sec)
34080           default_sec->common.flags |= SECTION_ARM_PURECODE;
34081       return default_sec;
34082     }
34083
34084   /* Otherwise look whether a section has already been created with
34085      'section_name'.  */
34086   sec = get_named_section (decl, section_name, 0);
34087   if (!sec)
34088     /* If that is not the case passing NULL as the section's name to
34089        'get_named_section' will create a section with the declaration's
34090        section name.  */
34091     sec = get_named_section (decl, NULL, 0);
34092
34093   /* Set the SHF_ARM_PURECODE attribute.  */
34094   sec->common.flags |= SECTION_ARM_PURECODE;
34095
34096   return sec;
34097 }
34098
34099 /* Implements the TARGET_SECTION_FLAGS hook.
34100
34101    If DECL is a function declaration and pure-code is passed as an option
34102    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
34103    section's name and RELOC indicates whether the declarations initializer may
34104    contain runtime relocations.  */
34105
34106 static unsigned int
34107 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34108 {
34109   unsigned int flags = default_section_type_flags (decl, name, reloc);
34110
34111   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34112     flags |= SECTION_ARM_PURECODE;
34113
34114   return flags;
34115 }
34116
34117 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
34118
34119 static void
34120 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34121                            rtx op0, rtx op1,
34122                            rtx *quot_p, rtx *rem_p)
34123 {
34124   if (mode == SImode)
34125     gcc_assert (!TARGET_IDIV);
34126
34127   scalar_int_mode libval_mode
34128     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34129
34130   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34131                                         libval_mode, op0, mode, op1, mode);
34132
34133   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34134   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34135                                        GET_MODE_SIZE (mode));
34136
34137   gcc_assert (quotient);
34138   gcc_assert (remainder);
34139
34140   *quot_p = quotient;
34141   *rem_p = remainder;
34142 }
34143
34144 /*  This function checks for the availability of the coprocessor builtin passed
34145     in BUILTIN for the current target.  Returns true if it is available and
34146     false otherwise.  If a BUILTIN is passed for which this function has not
34147     been implemented it will cause an exception.  */
34148
34149 bool
34150 arm_coproc_builtin_available (enum unspecv builtin)
34151 {
34152   /* None of these builtins are available in Thumb mode if the target only
34153      supports Thumb-1.  */
34154   if (TARGET_THUMB1)
34155     return false;
34156
34157   switch (builtin)
34158     {
34159       case VUNSPEC_CDP:
34160       case VUNSPEC_LDC:
34161       case VUNSPEC_LDCL:
34162       case VUNSPEC_STC:
34163       case VUNSPEC_STCL:
34164       case VUNSPEC_MCR:
34165       case VUNSPEC_MRC:
34166         if (arm_arch4)
34167           return true;
34168         break;
34169       case VUNSPEC_CDP2:
34170       case VUNSPEC_LDC2:
34171       case VUNSPEC_LDC2L:
34172       case VUNSPEC_STC2:
34173       case VUNSPEC_STC2L:
34174       case VUNSPEC_MCR2:
34175       case VUNSPEC_MRC2:
34176         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34177            ARMv8-{A,M}.  */
34178         if (arm_arch5t)
34179           return true;
34180         break;
34181       case VUNSPEC_MCRR:
34182       case VUNSPEC_MRRC:
34183         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34184            ARMv8-{A,M}.  */
34185         if (arm_arch6 || arm_arch5te)
34186           return true;
34187         break;
34188       case VUNSPEC_MCRR2:
34189       case VUNSPEC_MRRC2:
34190         if (arm_arch6)
34191           return true;
34192         break;
34193       default:
34194         gcc_unreachable ();
34195     }
34196   return false;
34197 }
34198
34199 /* This function returns true if OP is a valid memory operand for the ldc and
34200    stc coprocessor instructions and false otherwise.  */
34201
34202 bool
34203 arm_coproc_ldc_stc_legitimate_address (rtx op)
34204 {
34205   HOST_WIDE_INT range;
34206   /* Has to be a memory operand.  */
34207   if (!MEM_P (op))
34208     return false;
34209
34210   op = XEXP (op, 0);
34211
34212   /* We accept registers.  */
34213   if (REG_P (op))
34214     return true;
34215
34216   switch GET_CODE (op)
34217     {
34218       case PLUS:
34219         {
34220           /* Or registers with an offset.  */
34221           if (!REG_P (XEXP (op, 0)))
34222             return false;
34223
34224           op = XEXP (op, 1);
34225
34226           /* The offset must be an immediate though.  */
34227           if (!CONST_INT_P (op))
34228             return false;
34229
34230           range = INTVAL (op);
34231
34232           /* Within the range of [-1020,1020].  */
34233           if (!IN_RANGE (range, -1020, 1020))
34234             return false;
34235
34236           /* And a multiple of 4.  */
34237           return (range % 4) == 0;
34238         }
34239       case PRE_INC:
34240       case POST_INC:
34241       case PRE_DEC:
34242       case POST_DEC:
34243         return REG_P (XEXP (op, 0));
34244       default:
34245         gcc_unreachable ();
34246     }
34247   return false;
34248 }
34249
34250 /* Return the diagnostic message string if conversion from FROMTYPE to
34251    TOTYPE is not allowed, NULL otherwise.  */
34252
34253 static const char *
34254 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34255 {
34256   if (element_mode (fromtype) != element_mode (totype))
34257     {
34258       /* Do no allow conversions to/from BFmode scalar types.  */
34259       if (TYPE_MODE (fromtype) == BFmode)
34260         return N_("invalid conversion from type %<bfloat16_t%>");
34261       if (TYPE_MODE (totype) == BFmode)
34262         return N_("invalid conversion to type %<bfloat16_t%>");
34263     }
34264
34265   /* Conversion allowed.  */
34266   return NULL;
34267 }
34268
34269 /* Return the diagnostic message string if the unary operation OP is
34270    not permitted on TYPE, NULL otherwise.  */
34271
34272 static const char *
34273 arm_invalid_unary_op (int op, const_tree type)
34274 {
34275   /* Reject all single-operand operations on BFmode except for &.  */
34276   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34277     return N_("operation not permitted on type %<bfloat16_t%>");
34278
34279   /* Operation allowed.  */
34280   return NULL;
34281 }
34282
34283 /* Return the diagnostic message string if the binary operation OP is
34284    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34285
34286 static const char *
34287 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34288                            const_tree type2)
34289 {
34290   /* Reject all 2-operand operations on BFmode.  */
34291   if (element_mode (type1) == BFmode
34292       || element_mode (type2) == BFmode)
34293     return N_("operation not permitted on type %<bfloat16_t%>");
34294
34295   /* Operation allowed.  */
34296   return NULL;
34297 }
34298
34299 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34300
34301    In VFPv1, VFP registers could only be accessed in the mode they were
34302    set, so subregs would be invalid there.  However, we don't support
34303    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34304
34305    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34306    VFP registers in little-endian order.  We can't describe that accurately to
34307    GCC, so avoid taking subregs of such values.
34308
34309    The only exception is going from a 128-bit to a 64-bit type.  In that
34310    case the data layout happens to be consistent for big-endian, so we
34311    explicitly allow that case.  */
34312
34313 static bool
34314 arm_can_change_mode_class (machine_mode from, machine_mode to,
34315                            reg_class_t rclass)
34316 {
34317   if (TARGET_BIG_END
34318       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34319       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34320           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34321       && reg_classes_intersect_p (VFP_REGS, rclass))
34322     return false;
34323   return true;
34324 }
34325
34326 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34327    strcpy from constants will be faster.  */
34328
34329 static HOST_WIDE_INT
34330 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34331 {
34332   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34333   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34334     return MAX (align, BITS_PER_WORD * factor);
34335   return align;
34336 }
34337
34338 /* Emit a speculation barrier on target architectures that do not have
34339    DSB/ISB directly.  Such systems probably don't need a barrier
34340    themselves, but if the code is ever run on a later architecture, it
34341    might become a problem.  */
34342 void
34343 arm_emit_speculation_barrier_function ()
34344 {
34345   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34346 }
34347
34348 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34349 bool
34350 arm_q_bit_access (void)
34351 {
34352   if (cfun && cfun->decl)
34353     return lookup_attribute ("acle qbit",
34354                              DECL_ATTRIBUTES (cfun->decl));
34355   return true;
34356 }
34357
34358 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34359 bool
34360 arm_ge_bits_access (void)
34361 {
34362   if (cfun && cfun->decl)
34363     return lookup_attribute ("acle gebits",
34364                              DECL_ATTRIBUTES (cfun->decl));
34365   return true;
34366 }
34367
34368 /* NULL if insn INSN is valid within a low-overhead loop.
34369    Otherwise return why doloop cannot be applied.  */
34370
34371 static const char *
34372 arm_invalid_within_doloop (const rtx_insn *insn)
34373 {
34374   if (!TARGET_HAVE_LOB)
34375     return default_invalid_within_doloop (insn);
34376
34377   if (CALL_P (insn))
34378     return "Function call in the loop.";
34379
34380   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34381     return "LR is used inside loop.";
34382
34383   return NULL;
34384 }
34385
34386 bool
34387 arm_target_insn_ok_for_lob (rtx insn)
34388 {
34389   basic_block bb = BLOCK_FOR_INSN (insn);
34390   /* Make sure the basic block of the target insn is a simple latch
34391      having as single predecessor and successor the body of the loop
34392      itself.  Only simple loops with a single basic block as body are
34393      supported for 'low over head loop' making sure that LE target is
34394      above LE itself in the generated code.  */
34395
34396   return single_succ_p (bb)
34397     && single_pred_p (bb)
34398     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34399     && contains_no_active_insn_p (bb);
34400 }
34401
34402 #if CHECKING_P
34403 namespace selftest {
34404
34405 /* Scan the static data tables generated by parsecpu.awk looking for
34406    potential issues with the data.  We primarily check for
34407    inconsistencies in the option extensions at present (extensions
34408    that duplicate others but aren't marked as aliases).  Furthermore,
34409    for correct canonicalization later options must never be a subset
34410    of an earlier option.  Any extension should also only specify other
34411    feature bits and never an architecture bit.  The architecture is inferred
34412    from the declaration of the extension.  */
34413 static void
34414 arm_test_cpu_arch_data (void)
34415 {
34416   const arch_option *arch;
34417   const cpu_option *cpu;
34418   auto_sbitmap target_isa (isa_num_bits);
34419   auto_sbitmap isa1 (isa_num_bits);
34420   auto_sbitmap isa2 (isa_num_bits);
34421
34422   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34423     {
34424       const cpu_arch_extension *ext1, *ext2;
34425
34426       if (arch->common.extensions == NULL)
34427         continue;
34428
34429       arm_initialize_isa (target_isa, arch->common.isa_bits);
34430
34431       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34432         {
34433           if (ext1->alias)
34434             continue;
34435
34436           arm_initialize_isa (isa1, ext1->isa_bits);
34437           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34438             {
34439               if (ext2->alias || ext1->remove != ext2->remove)
34440                 continue;
34441
34442               arm_initialize_isa (isa2, ext2->isa_bits);
34443               /* If the option is a subset of the parent option, it doesn't
34444                  add anything and so isn't useful.  */
34445               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34446
34447               /* If the extension specifies any architectural bits then
34448                  disallow it.  Extensions should only specify feature bits.  */
34449               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34450             }
34451         }
34452     }
34453
34454   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34455     {
34456       const cpu_arch_extension *ext1, *ext2;
34457
34458       if (cpu->common.extensions == NULL)
34459         continue;
34460
34461       arm_initialize_isa (target_isa, arch->common.isa_bits);
34462
34463       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34464         {
34465           if (ext1->alias)
34466             continue;
34467
34468           arm_initialize_isa (isa1, ext1->isa_bits);
34469           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34470             {
34471               if (ext2->alias || ext1->remove != ext2->remove)
34472                 continue;
34473
34474               arm_initialize_isa (isa2, ext2->isa_bits);
34475               /* If the option is a subset of the parent option, it doesn't
34476                  add anything and so isn't useful.  */
34477               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34478
34479               /* If the extension specifies any architectural bits then
34480                  disallow it.  Extensions should only specify feature bits.  */
34481               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34482             }
34483         }
34484     }
34485 }
34486
34487 /* Scan the static data tables generated by parsecpu.awk looking for
34488    potential issues with the data.  Here we check for consistency between the
34489    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34490    a feature bit that is not defined by any FPU flag.  */
34491 static void
34492 arm_test_fpu_data (void)
34493 {
34494   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34495   auto_sbitmap fpubits (isa_num_bits);
34496   auto_sbitmap tmpset (isa_num_bits);
34497
34498   static const enum isa_feature fpu_bitlist_internal[]
34499     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34500   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34501
34502   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34503   {
34504     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34505     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34506     bitmap_clear (isa_all_fpubits_internal);
34507     bitmap_copy (isa_all_fpubits_internal, tmpset);
34508   }
34509
34510   if (!bitmap_empty_p (isa_all_fpubits_internal))
34511     {
34512         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34513                          " group that are not defined by any FPU.\n"
34514                          "       Check your arm-cpus.in.\n");
34515         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34516     }
34517 }
34518
34519 static void
34520 arm_run_selftests (void)
34521 {
34522   arm_test_cpu_arch_data ();
34523   arm_test_fpu_data ();
34524 }
34525 } /* Namespace selftest.  */
34526
34527 #undef TARGET_RUN_TARGET_SELFTESTS
34528 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34529 #endif /* CHECKING_P */
34530
34531 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34532    global variable based guard use the default else
34533    return a null tree.  */
34534 static tree
34535 arm_stack_protect_guard (void)
34536 {
34537   if (arm_stack_protector_guard == SSP_GLOBAL)
34538     return default_stack_protect_guard ();
34539
34540   return NULL_TREE;
34541 }
34542
34543 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34544    Unlike the arm version, we do NOT implement asm flag outputs.  */
34545
34546 rtx_insn *
34547 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34548                       vec<machine_mode> & /*input_modes*/,
34549                       vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34550                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34551 {
34552   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34553     if (startswith (constraints[i], "=@cc"))
34554       {
34555         sorry ("%<asm%> flags not supported in thumb1 mode");
34556         break;
34557       }
34558   return NULL;
34559 }
34560
34561 /* Generate code to enable conditional branches in functions over 1 MiB.
34562    Parameters are:
34563      operands: is the operands list of the asm insn (see arm_cond_branch or
34564        arm_cond_branch_reversed).
34565      pos_label: is an index into the operands array where operands[pos_label] is
34566        the asm label of the final jump destination.
34567      dest: is a string which is used to generate the asm label of the intermediate
34568        destination
34569    branch_format: is a string denoting the intermediate branch format, e.g.
34570      "beq", "bne", etc.  */
34571
34572 const char *
34573 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34574                     const char * branch_format)
34575 {
34576   rtx_code_label * tmp_label = gen_label_rtx ();
34577   char label_buf[256];
34578   char buffer[128];
34579   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34580                         CODE_LABEL_NUMBER (tmp_label));
34581   const char *label_ptr = arm_strip_name_encoding (label_buf);
34582   rtx dest_label = operands[pos_label];
34583   operands[pos_label] = tmp_label;
34584
34585   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34586   output_asm_insn (buffer, operands);
34587
34588   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34589   operands[pos_label] = dest_label;
34590   output_asm_insn (buffer, operands);
34591   return "";
34592 }
34593
34594 /* If given mode matches, load from memory to LO_REGS.
34595    (i.e [Rn], Rn <= LO_REGS).  */
34596 enum reg_class
34597 arm_mode_base_reg_class (machine_mode mode)
34598 {
34599   if (TARGET_HAVE_MVE
34600       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34601     return LO_REGS;
34602
34603   return MODE_BASE_REG_REG_CLASS (mode);
34604 }
34605
34606 struct gcc_target targetm = TARGET_INITIALIZER;
34607
34608 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34609
34610 opt_machine_mode
34611 arm_get_mask_mode (machine_mode mode)
34612 {
34613   if (TARGET_HAVE_MVE)
34614     return arm_mode_to_pred_mode (mode);
34615
34616   return default_get_mask_mode (mode);
34617 }
34618
34619 #include "gt-arm.h"